# Discretion in Clinical Decision Making: Evidence from Bunching
# Claire Boone
# Main analyses
# Creates all main figures and tables (Tab 1-4, Fig 1-6)
# Creates appendix: Figs A1-A10, Tables A1 (partial), A2,  A8-A18, A12-A26


# set up -----------------
  rm(list = ls()) 
  library(tidyverse)
  library(magrittr)
  library(bunching)
  library(dplyr)
  library(lfe)
  library(stargazer)
  library(scales)
  library(ggpubr)
  library(ashr)
  library(here)
  
  proj_dir <- here::here()
  raw_data <- file.path(proj_dir, "raw_data/")
  gen_data <- file.path(proj_dir, "gen_data/")
  out <- file.path(proj_dir, "out/")

  
# load main data --------------------
  load(paste0(gen_data,"clinic_bunching.Rda"))
  load(paste0(gen_data,"visits_bunching.Rda"))
  

# load hospitalization data ---------------------------
  ed <- read_delim(paste0(raw_data, 'raw_hospital.csv'), delim = ";")


# clean hospital data -----------------------
  # lowercase
  ed <- data.frame(lapply(ed, function(v) {
    if (is.character(v)) return(tolower(v))
    else return(v)
  }))
  names(ed) <- tolower(names(ed))
  
  ed$fecha_egr <- as.Date(ed$fecha_egr)
  
  ed <- ed[order(ed$id , ed$fecha_egr),]
  rows_n <- tapply(ed$id,ed$id, function(x) seq(1,length(x),1))
  ed$rows_n <- unlist(rows_n)
  
  ed <- ed[order(ed$id, ed$fecha_egr),]
  admit_total <- tapply(ed$rows_n, ed$id, function(x) rep(max(x), length(x)))
  ed$admit_total <- unlist(admit_total)  
  
  ed$serviciosalud[ed$serviciosalud == "null"] <- NA
  ed$seremi[ed$seremi == "null"] <- NA

  ed$serviciosalud <- as.numeric(levels(ed$serviciosalud))[ed$serviciosalud]
  ed$seremi <- as.numeric(levels(ed$seremi))[ed$seremi]

  ed$male <- ed$sexo
  ed[ed$male==2,]$male <- 0  
  ed[ed$male==3 |ed$male==9 ,]$male <- NA

  ed$previ <- as.numeric(levels(ed$previ))[ed$previ]
  ed$previ[ed$previ == 9 | ed$previ == 96 | ed$previ == 99] <- NA

  ed$mod[ed$mod == "null"] <- NA
  ed$mod <- as.numeric(levels(ed$mod))[ed$mod]
  
  ed$comuna <- as.numeric(levels(ed$comuna))[ed$comuna]

  cut_point_top <- quantile(ed$dias_estad, 0.99, na.rm = T)
  ed %<>% mutate(outlier_dias_estad = (dias_estad >= cut_point_top))
  ed$dias_estad_w <- ifelse(ed$outlier_dias_estad, cut_point_top, ed$dias_estad)
  
  # ICD-10 codes
  ed$diag1[ed$diag1 == "null"] <- NA
  ed$diag2[ed$diag2 == "null"] <- NA
  
  ed$death <- ed$cond_egr - 1
  ed$year = as.numeric(format(ed$fecha_egr,'%Y'))
  
  ed = ed %>% as_tibble %>% dplyr::mutate(diag1_cv = if_else(grepl('i|r03|n18', diag1), 1, 0), 
                                          diag2_cv = case_when(!is.na(diag2) ~ if_else(grepl('i|r03|n18', diag2), 1, 0)), 
                                          diag1_htn = if_else(grepl('i10|i11|i12|i13|i15|i50', diag1), 1, 0),
                                          diag2_htn = case_when(!is.na(diag2) ~ if_else(grepl('i10|i11|i12|i13|i15|i50', diag2), 1, 0)), 
                                          diag1_dm = if_else(grepl('e10|i12|n08|n04|n03|n18|z99|e10|e11|z79|', diag1), 1, 0), 
                                          diag2_dm = case_when(!is.na(diag2) ~ if_else(grepl('e10|i12|n08|n04|n03|n18|z99|e10|e11|z79', diag2), 1, 0)),
                                          diag1_missedhtn = if_else(grepl('r03', diag1), 1, 0), 
                                          diag2_missedhtn = case_when(!is.na(diag2) ~ if_else(grepl("r03", diag2), 1, 0)), 
                                          diag1_allminushtn = if_else(!grepl('i63|i20|i21|i22|i24|i25|i50', diag1), 1, 0), 
                                          diag2_allminushtn = case_when(!is.na(diag2) ~ if_else(!grepl('i63|i20|i21|i22|i24|i25|i50', diag2), 1, 0)), 
                                          diag1_stroke = if_else(grepl('i63', diag1), 1, 0),
                                          diag2_stroke = case_when(!is.na(diag2) ~ if_else(grepl('i63', diag2), 1, 0)),
                                          diag1_acs = if_else(grepl('i20|i21|i22|i24|i25', diag1), 1, 0),
                                          diag2_acs = case_when(!is.na(diag2) ~ if_else(grepl('i20|i21|i22|i24|i25', diag2), 1, 0)),
                                          diag1_chf = if_else(grepl('i50', diag1), 1, 0),
                                          diag2_chf = case_when(!is.na(diag2) ~ if_else(grepl('i50', diag2), 1, 0)),
                                          diag1_cvother= if_else((grepl('i', diag1) & !grepl('i63|i20|i21|i22|i24|i25|i50', diag1)), 1, 0),
                                          diag2_cvother = case_when(!is.na(diag2) ~ if_else((grepl('i', diag1) & !grepl('i63|i20|i21|i22|i24|i25|i50', diag2)), 1, 0)))
  
  ed = ed %>% as_tibble %>% dplyr::mutate(cv_hospitalization = if_else(diag1_cv==1 | (!is.na(diag2_cv) & diag2_cv==1), 1, 0), 
                                          htn_hospitalization = if_else(diag1_htn==1 | (!is.na(diag2_htn) & diag2_htn==1), 1, 0), 
                                          dm_hospitalization = if_else(diag1_dm==1 | (!is.na(diag2_dm) & diag2_dm==1), 1, 0),
                                          missedhtn_hospitalization = if_else(diag1_missedhtn==1 | (!is.na(diag2_missedhtn) & diag2_missedhtn==1), 1, 0),
                                          allminushtn = if_else(diag1_allminushtn==1 | (!is.na(diag2_allminushtn) & diag2_allminushtn==1), 1, 0),
                                          cv_stroke = if_else(diag1_stroke==1 | (!is.na(diag2_stroke) & diag2_stroke==1), 1, 0),
                                          cv_acs = if_else(diag1_acs==1 | (!is.na(diag2_acs) & diag2_acs==1), 1, 0),
                                          cv_chf = if_else(diag1_chf==1 | (!is.na(diag2_chf) & diag2_chf==1), 1, 0),
                                          cv_other = if_else(diag1_cvother==1 | (!is.na(diag2_cvother) & diag2_cvother==1), 1, 0))
  
  ed %<>% dplyr::mutate(cvminushtn = if_else(cv_hospitalization==1, 1, 0))
  ed %<>% dplyr::mutate(cvminushtn = if_else(cv_hospitalization==1 & htn_hospitalization==1, 0, cvminushtn))

# save clean hospital data --------
  save(ed, file = paste0(gen_data,"hospital_cleaned.Rda"))
 
# join hospital data to clinic data & clean -----------------------------
  hd %<>% dplyr::filter(fecha_atencion < "2019-01-01") 
  hdd <- hd
  hdd %<>% dplyr::select(id, fecha_atencion)
  
  ed <- dplyr::left_join(ed, hdd, by='id') 
  
  ed %<>% dplyr::filter(!is.na(fecha_atencion)) 
  ed %<>% dplyr::mutate(hosp_after = as.numeric(ifelse(fecha_egr >= fecha_atencion, 1, 0)))
  ed %<>% dplyr::filter(hosp_after==1)

  ed %<>% group_by(id) %<>% dplyr::arrange(fecha_egr) %<>%
    dplyr::mutate(rows_hosp = row_number()) %<>% ungroup() 
  ed$fecha_atencion <- NULL 
 
  hd <- dplyr::full_join(hd, ed, by='id') 
  length(unique(hd$id)) 
  
  hd %<>% dplyr::mutate(hosp_after = ifelse(is.na(hosp_after), 0, hosp_after))
  summary(hd$hosp_after) 
  
  hd %<>% dplyr::mutate(days_to_hosp = as.numeric(fecha_egr - fecha_atencion))
  hd %<>% dplyr::mutate(days_to_hosp = ifelse(is.na(days_to_hosp), 999999, days_to_hosp)) 
  
  hd %<>% dplyr::mutate(any_hosp_after = ifelse(hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(cv_hosp_after = ifelse(cv_hospitalization==1 & hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(htn_hosp_after = ifelse(htn_hospitalization==1 & hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(dm_hosp_after = ifelse(dm_hospitalization==1 & hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(missedhtn_hosp_after = ifelse(missedhtn_hospitalization==1 & hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(any_nohtn_after = ifelse(allminushtn==1 & hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(cv_nohtn_after = ifelse(cvminushtn==1 & hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(cv_stroke_after = ifelse(cv_stroke==1 & hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(cv_acs_after = ifelse(cv_acs==1 & hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(cv_chf_after  = ifelse(cv_chf==1 & hosp_after==1, 1, 0))
  hd %<>% dplyr::mutate(cv_other_after = ifelse(cv_other==1 & hosp_after==1, 1, 0))
 
  hd %<>% dplyr::mutate(any_hosp_90 = ifelse(hosp_after==1 & days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate(cv_hosp_90 = ifelse(cv_hospitalization==1 & days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate(htn_hosp_90 = ifelse(htn_hospitalization==1 & days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate(dm_hosp_90 = ifelse(dm_hospitalization==1 & days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate(missedhtn_hosp_90 = ifelse(missedhtn_hosp_after==1 & days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate(any_nohtn_90 = ifelse(allminushtn==1 & hosp_after==1 & days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate( cv_nohtn_90 = ifelse(cvminushtn==1 & hosp_after==1 &  days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate(cv_stroke_90 = ifelse(cv_stroke==1 & hosp_after==1 & days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate(cv_acs_90 = ifelse(cv_acs==1 & hosp_after==1 & days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate(cv_chf_90  = ifelse(cv_chf==1 & hosp_after==1 & days_to_hosp<=90, 1, 0))
  hd %<>% dplyr::mutate(cv_other_90 = ifelse(cv_other==1 & hosp_after==1 & days_to_hosp<=90, 1, 0))
  
  hd %<>% dplyr::mutate(any_hosp_180 = ifelse(hosp_after==1 & days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate(cv_hosp_180 = ifelse(cv_hospitalization==1 & days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate(htn_hosp_180 = ifelse(htn_hospitalization==1 & days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate(dm_hosp_180 = ifelse(dm_hospitalization==1 & days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate(missedhtn_hosp_180 = ifelse(missedhtn_hosp_after==1 & days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate(any_nohtn_180 = ifelse(allminushtn==1 & hosp_after==1 & days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate( cv_nohtn_180 = ifelse(cvminushtn==1 & hosp_after==1 &  days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate(cv_stroke_180 = ifelse(cv_stroke==1 & hosp_after==1 & days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate(cv_acs_180 = ifelse(cv_acs==1 & hosp_after==1 & days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate(cv_chf_180  = ifelse(cv_chf==1 & hosp_after==1 & days_to_hosp<=180, 1, 0))
  hd %<>% dplyr::mutate(cv_other_180 = ifelse(cv_other==1 & hosp_after==1 & days_to_hosp<=180, 1, 0))
  
  hd %<>% dplyr::mutate(any_hosp_365 = ifelse(hosp_after==1 & days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate(cv_hosp_365 = ifelse(cv_hospitalization==1 & days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate(htn_hosp_365 = ifelse(htn_hospitalization==1 & days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate(dm_hosp_365 = ifelse(dm_hospitalization==1 & days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate(missedhtn_hosp_365 = ifelse(missedhtn_hosp_after==1 & days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate(any_nohtn_365 = ifelse(allminushtn==1 & hosp_after==1 & days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate( cv_nohtn_365 = ifelse(cvminushtn==1 & hosp_after==1 &  days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate(cv_stroke_365 = ifelse(cv_stroke==1 & hosp_after==1 & days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate(cv_acs_365 = ifelse(cv_acs==1 & hosp_after==1 & days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate(cv_chf_365  = ifelse(cv_chf==1 & hosp_after==1 & days_to_hosp<=365, 1, 0))
  hd %<>% dplyr::mutate(cv_other_365 = ifelse(cv_other==1 & hosp_after==1 & days_to_hosp<=365, 1, 0))
 
  hd %<>% dplyr::group_by(id, any_hosp_365) %<>% dplyr::mutate(rows_any_hosp_365 = row_number()) %<>% ungroup()
  hd %<>% dplyr::group_by(id, cv_hosp_365) %<>% dplyr::mutate(rows_cv_hosp_365 = row_number()) %<>% ungroup()
  hd %<>% dplyr::group_by(id, htn_hosp_365) %<>% dplyr::mutate(rows_htn_hosp_365 = row_number()) %<>% ungroup()
  
  hd %<>% dplyr::group_by(id) %<>% dplyr::mutate(
    any_hosp_after = max(any_hosp_after),
    cv_hosp_after  = max(cv_hosp_after ),
    htn_hosp_after = max(htn_hosp_after),
    dm_hosp_after = max(dm_hosp_after),
    missedhtn_hosp_after = max(missedhtn_hosp_after),
    any_nohtn_after = max(any_nohtn_after),
    cv_nohtn_after = max(cv_nohtn_after),
    cv_stroke_after = max(cv_stroke_after),
    cv_acs_after = max(cv_acs_after),
    cv_chf_after = max(cv_chf_after),
    cv_other_after = max(cv_other_after),
    
    any_hosp_90 = max(any_hosp_90),
    cv_hosp_90  = max(cv_hosp_90 ),
    htn_hosp_90 = max(htn_hosp_90),
    dm_hosp_90 = max(dm_hosp_90),
    missedhtn_hosp_90  = max(missedhtn_hosp_90 ),
    any_nohtn_90 = max(any_nohtn_90),
    cv_nohtn_90 = max(cv_nohtn_90),
    cv_stroke_90 = max(cv_stroke_90),
    cv_acs_90 = max(cv_acs_90),
    cv_chf_90 = max(cv_chf_90),
    cv_other_90 = max(cv_other_90),
    
    any_hosp_180 = max(any_hosp_180),
    cv_hosp_180  = max(cv_hosp_180 ),
    htn_hosp_180 = max(htn_hosp_180),
    dm_hosp_180 = max(dm_hosp_180),
    missedhtn_hosp_180 = max(missedhtn_hosp_180),
    any_nohtn_180 = max(any_nohtn_180),
    cv_nohtn_180 = max(cv_nohtn_180),
    cv_stroke_180 = max(cv_stroke_180),
    cv_acs_180 = max(cv_acs_180),
    cv_chf_180 = max(cv_chf_180),
    cv_other_180 = max(cv_other_180),
    
    any_hosp_365 = max(any_hosp_365),
    cv_hosp_365  = max(cv_hosp_365 ),
    htn_hosp_365 = max(htn_hosp_365),
    dm_hosp_365 = max(dm_hosp_365),
    missedhtn_hosp_365 = max(missedhtn_hosp_365),
    any_nohtn_365 = max(any_nohtn_365),
    cv_nohtn_365 = max(cv_nohtn_365),
    cv_stroke_365 = max(cv_stroke_365),
    cv_acs_365 = max(cv_acs_365),
    cv_chf_365 = max(cv_chf_365),
    cv_other_365 = max(cv_other_365),

    rows_any_hosp_365 = max(rows_any_hosp_365),
    rows_cv_hosp_365 = max(rows_cv_hosp_365),
    rows_htn_hosp_365 = max(rows_htn_hosp_365)
  ) %<>% ungroup()
  
  hd$rows_n.x <- NULL
  hd$rows_n.y <- NULL
  hd$rows_hosp <- NULL
  
  hd %<>% dplyr::arrange(id, days_to_hosp) %<>% dplyr::group_by(id) %<>% dplyr::mutate(rows_hosp = row_number()) %<>% ungroup()
  
  hd %<>% dplyr::filter(rows_hosp==1) 
  length(unique(hd$id))
  
  # patient follow-up time
  hd %<>% dplyr::mutate(followup = as.numeric(as.Date("2018-12-31") - fecha_atencion))
  hd$followup_yrs <- hd$followup/365.25
  summary(hd$followup_yrs)
  
  hd$month <- lubridate::month(hd$fecha_atencion)
  
  hd$age_20s <- ifelse(hd$age_at_visit<30, 1, 0)
  hd$age_30s <- ifelse(hd$age_at_visit>=30 & hd$age_at_visit<40, 1, 0)
  hd$age_40s <- ifelse(hd$age_at_visit>=40 & hd$age_at_visit<50, 1, 0)
  hd$age_50s <- ifelse(hd$age_at_visit>=50 & hd$age_at_visit<60, 1, 0)
  hd$age_60s <- ifelse(hd$age_at_visit>=60 & hd$age_at_visit<70, 1, 0)
  hd$age_70s <- ifelse(hd$age_at_visit>=70 & hd$age_at_visit<80, 1, 0)
  hd$age_80s <- ifelse(hd$age_at_visit>=80 & hd$age_at_visit<90, 1, 0)
  hd$age_90s <- ifelse(hd$age_at_visit>=90, 1, 0)
  
  hd %<>% dplyr::mutate(age_tens = ifelse(age_20s==1, 20, NA))
  hd %<>% dplyr::mutate(age_tens = ifelse(age_30s==1, 30, age_tens))
  hd %<>% dplyr::mutate(age_tens = ifelse(age_40s==1, 40, age_tens))
  hd %<>% dplyr::mutate(age_tens = ifelse(age_50s==1, 50, age_tens))
  hd %<>% dplyr::mutate(age_tens = ifelse(age_60s==1, 60, age_tens))
  hd %<>% dplyr::mutate(age_tens = ifelse(age_70s==1, 70, age_tens))
  hd %<>% dplyr::mutate(age_tens = ifelse(age_80s==1, 80, age_tens))
  hd %<>% dplyr::mutate(age_tens = ifelse(age_90s==1, 90, age_tens))
    
  hd$age_round <- round(hd$age_at_visit, digits=0)
  
  hd$male.y <- NULL 
  hd$male <- hd$male.x
  hd$male.x <- NULL 
  hd %<>% dplyr::mutate(male = ifelse(is.na(male), 0, male))
  hd %<>% dplyr::mutate(diag_dislip10 = ifelse(is.na(diag_dislip), 0, diag_dislip))
  
  hd %<>% dplyr::mutate(risk_hi = ifelse(risk_vhi==1 , 1, risk_hi))
  hd %<>% dplyr::mutate(risk_hi = ifelse(is.na(risk_hi), 0, risk_hi))
  hd %<>% dplyr::mutate(risk_med = ifelse(is.na(risk_med), 0, risk_med))
  hd %<>% dplyr::mutate(risk_low = ifelse(is.na(risk_low), 0, risk_low))
  
  hd %<>% dplyr::mutate(under140 = ifelse(pa_sys<140, 1, 0))
  hd %<>% dplyr::mutate(over140 = ifelse(pa_sys>=140, 1, 0))
  
  hd %<>% dplyr::mutate(bmi_obese23 = ifelse(bmi_obese2 ==1 | bmi_obese3==1, 1, 0))
  
  hd %<>% dplyr::mutate(over40 = ifelse(age_round>=40, 1, 0))
  hd %<>% dplyr::mutate(over50 = ifelse(age_round>=50, 1, 0))
  hd %<>% dplyr::mutate(over60 = ifelse(age_round>=60, 1, 0))
  hd %<>% dplyr::mutate(over70 = ifelse(age_round>=70, 1, 0))
  hd %<>% dplyr::mutate(over80 = ifelse(age_round>=80, 1, 0))
  hd %<>% dplyr::mutate(over90 = ifelse(age_round>=90, 1, 0))
  
  hd %<>% dplyr::mutate(over55 = ifelse(age_at_visit>=55, 1, 0))
  hd %<>% dplyr::mutate(over65 = ifelse(age_at_visit>=65, 1, 0))
  
  hd %<>% dplyr::mutate(highrisk = ifelse(riesgo_cvn>=3, 1, 0))
  hd %<>% dplyr::mutate(highrisk = ifelse(is.na(highrisk), 0, highrisk))
  
  hd$comuna.y <- NULL
  hd$comuna <- hd$comuna.x
  hd$comuna.x <- NULL
  hd %<>% dplyr::mutate(comuna = ifelse(is.na(comuna), 999999, comuna)) 
  
  hd$pro_fe <- hd$pro 
  hd %<>% dplyr::mutate(pro_fe = ifelse(pro_fe=="kinesiologo" | pro_fe=="tecnico paramedico"| is.na(pro_fe), "other", pro_fe))
  
  quantile(hd$pa_dia, c(0.01, 0.05, 0.9, 0.95, 0.99))
  
  hd %<>% dplyr::mutate(diastolic_bp = ifelse(hd$pa_dia<60, 50, NA))
  hd %<>% dplyr::mutate(diastolic_bp = ifelse(hd$pa_dia>=60  & hd$pa_dia<70, 60, diastolic_bp))
  hd %<>% dplyr::mutate(diastolic_bp = ifelse(hd$pa_dia>=70  & hd$pa_dia<80, 70, diastolic_bp))
  hd %<>% dplyr::mutate(diastolic_bp = ifelse(hd$pa_dia>=80  & hd$pa_dia<90, 80, diastolic_bp))
  hd %<>% dplyr::mutate(diastolic_bp = ifelse(hd$pa_dia>=90  & hd$pa_dia<100, 90, diastolic_bp))
  hd %<>% dplyr::mutate(diastolic_bp = ifelse(hd$pa_dia>=100  & hd$pa_dia<110, 100, diastolic_bp))
  hd %<>% dplyr::mutate(diastolic_bp = ifelse(hd$pa_dia>=110  & hd$pa_dia<120, 110, diastolic_bp))
  hd %<>% dplyr::mutate(diastolic_bp = ifelse(hd$pa_dia>=120, 120, diastolic_bp))

  hd$year <- lubridate::year(hd$fecha_atencion)
  hd$weekday <- lubridate::wday(hd$fecha_atencion)

  # prep comuna and clinic level data
  hd$income <- log(hd$ypch_usd2021)
  hd$employed <- hd$activ_1_mean
  hd$primary_edu <- hd$primary
  hd$secondary_edu <- hd$secondary
  hd$tertiary_edu <- hd$tertiary
  hd$secondary_or_less <- hd$secondary + hd$primary
  hd$literacy <- hd$e1_1_mean
  hd$log_visits <- log(hd$rows_max_clinic)
  hd %<>% dplyr::mutate(literacy = ifelse(hd$literacy>=0.97, 1, 0))
  hd$mean_age <- hd$edad_mean
  
  hd$clinic_type <- 0
  hd %<>% dplyr::mutate(clinic_type = ifelse(type_consrural==1, 1, clinic_type))
  hd %<>% dplyr::mutate(clinic_type = ifelse(type_consurb==1, 2, clinic_type))
  hd %<>% dplyr::mutate(clinic_type = ifelse(type_hospbaja==1, 3, clinic_type))
  
  hd$rows_any_hosp_365 <- hd$rows_any_hosp_365 - 1
  hd$rows_cv_hosp_365 <- hd$rows_cv_hosp_365 - 1
  hd$rows_htn_hosp_365 <- hd$rows_htn_hosp_365 - 1  
  
  hd$quarter <- lubridate::quarter(hd$fecha_atencion)
  
  # clean bmi variables
    hd %<>% dplyr::mutate(bmi_sobese = ifelse(nut_ob2==1|nut_ob3==1|nut_mega==1|nut_obsev==1, 1, 0))
    hd %<>% dplyr::mutate(bmi_sobese = ifelse(is.na(bmi_sobese) & bmi_w>=35, 1, bmi_sobese))
    hd %<>% dplyr::mutate(bmi_sobese = ifelse(is.na(bmi_sobese) & bmi_w<35, 0, bmi_sobese))
    hd %<>% dplyr::mutate(bmi_sobesem = ifelse(is.na(bmi_sobese), mean(bmi_sobese, na.rm=T), bmi_sobese))
   
    hd %<>% dplyr::mutate(bmi_over_ob = ifelse(nut_obese==1 | nut_ob1==1 | nut_over==1, 1, 0))
    hd %<>% dplyr::mutate(bmi_over_ob = ifelse(is.na(bmi_over_ob) & bmi_w>=25 & bmi_w<30, 1, bmi_over_ob))
    hd %<>% dplyr::mutate(bmi_over_ob = ifelse(is.na(bmi_over_ob) & (bmi_w<25 | bmi_w>=30), 0, bmi_over_ob))
    
    hd %<>% dplyr::mutate(bmi_norm_under = ifelse(nut_und==1 | nut_mal==1 | nut_mal2==1 | nut_ris==1 | nut_norm==1, 1, 0))
    hd %<>% dplyr::mutate(bmi_norm_under = ifelse(is.na(bmi_norm_under) & bmi_w<25, 1, bmi_norm_under))
    hd %<>% dplyr::mutate(bmi_norm_under = ifelse(is.na(bmi_norm_under) & bmi_w>=25, 0, bmi_norm_under))

  # clinic type
    hd$clinic_type_rural = ifelse(hd$tipo_est=="consultorio general rural" | hd$tipo_est=="posta de salud rural", 1, 0)
    hd$clinic_type_urban = ifelse(hd$tipo_est=="consultorio general urbano", 1, 0)
    hd$clinic_type_lowmedhosp = ifelse(hd$tipo_est=="establecimiento baja complejidad"| hd$tipo_est=="establecimiento mediana complejidad" |
                                         hd$tipo_est=="centro comunitario de salud familiar", 1, 0)
    
    hd %<>% dplyr::mutate(clinic_type1 = ifelse(clinic_type_rural==1, 0, NA))
    hd %<>% dplyr::mutate(clinic_type1 = ifelse(clinic_type_urban==1, 1, clinic_type1))
    hd %<>% dplyr::mutate(clinic_type1 = ifelse(clinic_type_lowmedhosp==1, 2, clinic_type1))
    
  
    rm(hdd)
    rm(ed)
    rm(hdc)
    rm(rows_n)
    rm(admit_total)
    
# save cleaned primary care + hospitalization data ---------
  save(hd, file = paste0(gen_data,"primary_hospital_cleaned.Rda"))
# join to case survey data -----
  casen <- read.csv(paste0(gen_data, "casen_micro.csv"), check.names=FALSE)
  hd <- left_join(hd, casen, by = "comuna")
  rm(casen)
  
  # clean 
  hd$ypch_usd2015 <- hd$ypch_mean / 711.03
  hd$ypch_usd2021 <- hd$ypch_usd2015 * 1.11
  
  hd$income <- log(hd$ypch_usd2021)
  hd$employed <- hd$activ_1_mean
  hd$primary_edu <- hd$primary
  hd$secondary_edu <- hd$secondary
  hd$tertiary_edu <- hd$tertiary
  hd$secondary_or_less <- hd$secondary + hd$primary
  hd$literacy <- hd$e1_1_mean
  hd %<>% dplyr::mutate(literacy = ifelse(hd$literacy>=0.97, 1, 0))
  hd$mean_age <- hd$edad_mean
  
# create bunching variable -----------------
  c <- hd
  c %<>% dplyr::filter(rows_clinic==1)
  c$b_t_10 <- c$b_10 / c$b_sd_10
  c$abs_b_t_10 <- abs(c$b_t_10)
  c$sig05 <- ifelse(c$abs_b_t_10>=1.96, 1, 0)
  c$negative_b10 <- ifelse(c$b_10<0, 1, 0)
  c$sig05 <- as.factor(c$sig05)
  
  # indicator for significant negative bunching:
  c$sig_neg_bunch <- ifelse(c$sig05==1 & c$negative_b10==1, 1, 0)
  # magnitude of bunching (*-1), among clinics with significant negative bunching
  c$sig_neg_bunch_mag <- ifelse(c$sig05==1 & c$negative_b10==1, (c$b_10)*-1, 0)
  # magnitude of bunching (*-1), among all clinics with negative bunching
  c$neg_bunch_mag <- ifelse(c$negative_b10==1, (c$b_10)*-1, 0)
  # join with hd:
  c %<>% dplyr::select(cod_est, sig_neg_bunch, sig_neg_bunch_mag, neg_bunch_mag)
  
  # magnitude of bunching (*-1), among ALL clinics. normalized to mean 0 sd 1
  c$inv_b_10 <- c$b_10 * -1
  c$norm_bunch <- (c$inv_b_10 - mean(c$inv_b_10, na.rm=T))/sd(c$inv_b_10, na.rm = T)
  c$norm_bunch <- ifelse(c$norm_bunch< (-2.5), -2.5, c$norm_bunch)
 
  # normalize magnitude in sig_neg_bunch_mag
  c$sig_neg_bunch_mag <- (c$sig_neg_bunch_mag - mean(c$sig_neg_bunch_mag, na.rm=T))/sd(c$sig_neg_bunch_mag, na.rm = T)

  # normalize magnitude in neg_bunch_mag
  c$neg_bunch_mag <- (c$neg_bunch_mag - mean(c$neg_bunch_mag, na.rm=T))/sd(c$neg_bunch_mag, na.rm = T)

  hd <- dplyr::left_join(hd, c, by="cod_est")
  
# (FIG 1C) ----------------------------
  c <- hd
  c %<>% dplyr::select(cod_est, pa_sys)
  c %<>% dplyr::filter(cod_est==114307 | cod_est==114317 | cod_est==114714 | 
                       cod_est==110160 |  cod_est==110355 | cod_est==111382)
  
  c$id <- ""
  c %<>% dplyr::mutate(id = ifelse(cod_est==114307, "Clinic A", id))  
  c %<>% dplyr::mutate(id = ifelse(cod_est==114317, "Clinic B", id)) 
  c %<>% dplyr::mutate(id = ifelse(cod_est==114714, "Clinic C", id)) 
  c %<>% dplyr::mutate(id = ifelse(cod_est==110160, "Clinic D", id)) 
  c %<>% dplyr::mutate(id = ifelse(cod_est==110355, "Clinic E", id)) 
  c %<>% dplyr::mutate(id = ifelse(cod_est==111382, "Clinic F", id)) 
  
  c$id <- as.factor(c$id)
  
  (sys <- ggplot(data=c, aes(pa_sys)) + geom_vline(xintercept=140, color = 'grey50', linetype = 'dashed') +
      geom_histogram(binwidth = 2, col="white", fill = "black", alpha=1) + theme_light() + 
      labs(x="Systolic Blood Pressure", y="N Patients") + scale_y_continuous(labels = comma) + xlim(80,200) + 
      theme(axis.text.x = element_text(color = "black", size = 8)) + theme(plot.title = element_text(size=8))) 
  
  (grid <- sys + facet_wrap(id ~. , ncol=3, scales='free'))
  ggsave(grid, file = paste0(leaf,"hist_by_clinic.pdf"), width = 9, height =5.5)   
  
  rm(c)
  
# (FIG 2) ------------------
  c <- hd
  
  # clinics with few zeros 
  bunch <- bunchit(z_vector = c[c$cid5==7 & c$low_rounding05==1,]$pa_sys, zstar = 140, binwidth = 5, bins_l = 8, bins_r = 8,
                   poly = 5, t0 = 0, t1 = 1, notch=F, binv = "min",
                   p_b = F, seed = 69, rn = c(10),
                   p_title = "",
                   p_xtitle = "Systolic Blood Pressure", p_ytitle = "Visits", 
                   p_zstar_color = "white", p_cf_color = "grey70", p_cf_size=1.5, p_freq_size = 0.8, p_freq_msize = 1.5)
  (bpD <- bunch$plot + theme_light() + theme(legend.position = "none") + 
      geom_vline(xintercept=140, color = 'grey50', linetype = 'dashed'))

  
  # clinics with many zeros 
  bunch <- bunchit(z_vector = c[c$cid5==101 & c$low_rounding05==0,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                   poly = 3, t0 = 0, t1 = 1, notch=F, binv = "min",
                   p_b = F, seed = 69, rn = c(10),
                   p_xtitle = "Systolic Blood Pressure", p_ytitle = "Visits",
                   p_zstar_color = "white", p_cf_color = "grey70", p_cf_size=1.5, p_freq_size = 0.8, p_freq_msize = 1.5)
  (bpF <- bunch$plot + theme_light() + theme(legend.position = "none") + 
      geom_vline(xintercept=140, color = 'grey50', linetype = 'dashed'))

   (bunches <- ggpubr::ggarrange(bpD, bpF, ncol = 2, nrow = 1))
  ggsave(bunches, file = paste0(leaf,"bunches_two_clinics.pdf"), width = 7.1, height = 3.5)
  

  
# sample exclusions ---------------------------------------------
  hd %<>% dplyr::filter(age_at_visit<80 & age_at_visit>=18)
  # for table A1
  length(unique(hd$id))                             
  length(unique(hd$cod_est))                        
  
  hd %<>% dplyr::filter(followup_yrs>=1)  
  # for table A1
  length(unique(hd$id))                             
  length(unique(hd$cod_est))   

  
# save data 18-80 ----------------------------------------------- 
  save(hd, file = paste0(gen_data,"primary_hospital_cleaned_included.Rda"))

  # load ENS data
  ens <- read.csv(paste0(gen_data,'ens_2016_analysis.csv'))
  
  # save IDs of included patients 
  hdi <- hd
  hdi %<>% dplyr::select(id, sig_neg_bunch, sig_neg_bunch_mag, cod_est)
  save(hdi, file = paste0(gen_data,"included_ids.Rda"))
#----------------------------------------------------------------#    
#### MAIN RESULTS ####
# (FIG 1A, 1B) ------------------------
  
  # national health survey (ENS) systolic bp
  (ens_sys <- ggplot(data=ens, aes(pa_sys)) + geom_vline(xintercept=140, color = 'black', linetype = 'dashed') +
     geom_histogram(binwidth = 1, col="white", fill = "black", alpha=1) + theme_light() + 
     labs(x="Systolic Blood Pressure", y="N Individuals") + scale_y_continuous(labels = comma) +
     scale_x_continuous(breaks=c(80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200)) + xlim(80,200) + 
     theme(axis.text.x = element_text(color = "black", size = 10)) + theme(plot.title = element_text(size=15))) 
  
  # electronic health records systolic bp
  (ehr_sys <- ggplot(data=hd, aes(pa_sys)) + geom_vline(xintercept=140, color = 'black', linetype = 'dashed') +
      geom_histogram(binwidth = 1, col="white", fill = "black", alpha=1) + theme_light() + 
      labs(x="Systolic Blood Pressure", y="N Individuals") + scale_y_continuous(labels = comma) +
      scale_x_continuous(breaks=c(80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200)) + xlim(80,200) + 
      theme(axis.text.x = element_text(color = "black", size = 10)) + theme(plot.title = element_text(size=15))) 
  
  sys_both <- ggpubr::ggarrange(ens_sys, ehr_sys, ncol = 2, nrow = 1)
  ggsave(sys_both, file = paste0(leaf,"healthsurvey_ehr_sys.pdf"), width = 8, height = 3.5)
  
# (FIG 3) --------------------------   
  hdc <- hd
  hdc$rows_clinic <- NULL
  hdc %<>% dplyr::group_by(cod_est) %<>% dplyr::mutate(rows_clinic = row_number()) %<>% ungroup()
  hdc %<>% dplyr::filter(rows_clinic==1)
  
  est <- felm(sig_neg_bunch_mag ~ income  | year + quarter  | 0 | cod_est, data = hdc)
  coefs = as.data.frame(summary(est)$coefficients)
  coefs <- coefs[, 1:2]
  coefs$vars = rownames(coefs)
  
  est <- felm(sig_neg_bunch_mag ~ log_visits  | year + quarter  | 0 | cod_est, data = hdc)
  coefs1 = as.data.frame(summary(est)$coefficients)
  coefs1 <- coefs1[, 1:2]
  coefs1$vars = rownames(coefs1)
  
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(sig_neg_bunch_mag ~ mean_age  | year + quarter  | 0 | cod_est, data = hdc)
  coefs1 = as.data.frame(summary(est)$coefficients)
  coefs1 <- coefs1[, 1:2]
  coefs1$vars = rownames(coefs1)
  
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(sig_neg_bunch_mag ~ tertiary_edu  | year + quarter  | 0 | cod_est, data = hdc)
  coefs1 = as.data.frame(summary(est)$coefficients)
  coefs1 <- coefs1[, 1:2]
  coefs1$vars = rownames(coefs1)
  
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(sig_neg_bunch_mag ~ secondary_or_less  | year | 0 | cod_est, data = hdc)
  coefs1 = as.data.frame(summary(est)$coefficients)
  coefs1 <- coefs1[, 1:2]
  coefs1$vars = rownames(coefs1)
  
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(sig_neg_bunch_mag ~  type_consrural  | year + quarter  | 0 | cod_est, data = hdc)
  coefs1 = as.data.frame(summary(est)$coefficients)
  coefs1 <- coefs1[, 1:2]
  coefs1$vars = rownames(coefs1)
  
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(sig_neg_bunch_mag ~  type_consurb  | year + quarter  | 0 | cod_est, data = hdc)
  coefs1 = as.data.frame(summary(est)$coefficients)
  coefs1 <- coefs1[, 1:2]
  coefs1$vars = rownames(coefs1)
  
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(sig_neg_bunch_mag ~ type_hospbaja  | year + quarter  | 0 | cod_est, data = hdc)
  coefs1 = as.data.frame(summary(est)$coefficients)
  coefs1 <- coefs1[, 1:2]
  coefs1$vars = rownames(coefs1)
  
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(sig_neg_bunch_mag ~ rural_mean  | year + quarter  | 0 | cod_est, data = hdc)
  coefs1 = as.data.frame(summary(est)$coefficients)
  coefs1 <- coefs1[, 1:2]
  coefs1$vars = rownames(coefs1)
  
  coefs <- rbind(coefs, coefs1)
  
  coefs$vars[coefs$vars=="income"] <- "Municipality mean household income"
  coefs$vars[coefs$vars=="log_visits"] <- "Log total visits at clinic"
  coefs$vars[coefs$vars=="mean_age"] <- "Municipality mean age"
  coefs$vars[coefs$vars=="tertiary_edu"] <- "% Population with tertiary education"
  coefs$vars[coefs$vars=="secondary_or_less"] <- "% Population with secondary or less education"
  coefs$vars[coefs$vars=="type_consrural"] <- "Clinc type: Rural primary care clinic"
  coefs$vars[coefs$vars=="type_consurb"] <- "Clinc type: Urban primary care clinic"
  coefs$vars[coefs$vars=="type_hospbaja"] <- "Clinc type: Low complexity hospital"
  coefs$vars[coefs$vars=="rural_mean"] <- "% Municipality that is rural"
  
  
  (bal_clinic <- ggplot(coefs, aes(Estimate, vars)) + 
      geom_vline(xintercept=0, lty=2,  colour="grey50")  +
      geom_errorbarh(aes(xmin=Estimate - 1.96*`Cluster s.e.`, xmax=Estimate + 1.96*`Cluster s.e.`), 
                     colour="grey40", height=0) + 
      geom_point(size=2, color="grey40", shape=15) +  xlab("Estimate with 95% confidence intervals") +
      scale_x_continuous(breaks = seq(-0.1, 0.1, by = 0.02)) + xlim(c(-3, 3)) + 
      ylab("") + theme_light() )


  est <- felm(sig_neg_bunch_mag ~ male  | year + quarter | 0 | cod_est, data = hd)
  coefs_p = as.data.frame(summary(est)$coefficients)
  coefs_p <- coefs_p[, 1:2]
  coefs_p$vars = rownames(coefs_p)
  
  est <- felm(sig_neg_bunch_mag ~ age_at_visit  |year + quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ risk_hi  | year + quarter   | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ risk_med  | year + quarter | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ risk_low  | year + quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~  diag_dm10  | year + quarter   | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~  diag_dislip10  | year +quarter | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ bmi_sobese  | year + quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ bmi_over_ob  | year + quarter | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ bmi_norm_under  | year + month    | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ sedentarismo  | year + quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  hd$fonasa_a <- ifelse(is.na(hd$fonasa_a), 0, hd$fonasa_a)  
  est <- felm(sig_neg_bunch_mag ~ fonasa_a  | year +quarter   | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ fonasa_b | year + quarter    | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ fonasa_c  | year + quarter   | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ fonasa_d | year + quarter    | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ chol1 | year +quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ chol2 | year + quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ chol3 | year +quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  
  coefs_p$vars[coefs_p$vars=="male"] <-   "Male"         
  coefs_p$vars[coefs_p$vars=="age_at_visit"] <-  "Age (years)"
  coefs_p$vars[coefs_p$vars=="risk_hi"] <- "Cardiovascular risk: high"  
  coefs_p$vars[coefs_p$vars=="risk_med"] <- "Cardiovascular risk: moderate"
  coefs_p$vars[coefs_p$vars=="risk_low"] <- "Cardiovascular risk: low"
  coefs_p$vars[coefs_p$vars=="diag_dm10"] <- "Diagnosed type 2 diabetes"
  coefs_p$vars[coefs_p$vars=="diag_dislip10"] <- "Diagnosed dyslipidemia"
  coefs_p$vars[coefs_p$vars=="bmi_sobese"] <- "BMI: Severely obese"
  coefs_p$vars[coefs_p$vars=="bmi_over_ob"] <- "BMI: Overweight or obese"
  coefs_p$vars[coefs_p$vars=="bmi_norm_under"] <- "BMI: Normal or underweight"
  coefs_p$vars[coefs_p$vars=="sedentarismo"] <- "Sedentary"
  coefs_p$vars[coefs_p$vars=="fonasa_a"] <- "Monthly income: below poverty line"  
  coefs_p$vars[coefs_p$vars=="fonasa_b"] <- "Monthly income < $320 or pensioner"        
  coefs_p$vars[coefs_p$vars=="fonasa_c"] <- "Monthly income $320-465"  
  coefs_p$vars[coefs_p$vars=="fonasa_d"] <- "Monthly income > $465"   
  coefs_p$vars[coefs_p$vars=="chol1"] <- "High cholesterol: controlled"      
  coefs_p$vars[coefs_p$vars=="chol2"] <- "High cholesterol: moderate"       
  coefs_p$vars[coefs_p$vars=="chol3"] <- "High cholesterol: severe"
  coefs_p$vars[coefs_p$vars=="age_20s"] <- "Age 18-29"
  coefs_p$vars[coefs_p$vars=="age_30s"] <- "Age 30-39"
  coefs_p$vars[coefs_p$vars=="age_40s"] <-  "Age 40-49"
  coefs_p$vars[coefs_p$vars=="age_50s"] <-  "Age 50-59"
  coefs_p$vars[coefs_p$vars=="age_60s"] <- "Age 60-69"     
  coefs_p$vars[coefs_p$vars=="age_70s"] <- "Age 70-79"
  
  (bal_patient <- ggplot(coefs_p, aes(Estimate, vars)) + 
      geom_vline(xintercept=0, lty=2,  colour="grey50")  +
      geom_errorbarh(aes(xmin=Estimate - 1.96*`Cluster s.e.`, xmax=Estimate + 1.96*`Cluster s.e.`), 
                     colour="grey40", height=0) +
      geom_point(size=2, shape=15, color="grey40") +  
      scale_x_continuous(breaks = seq(-0.5, 0.5, by = 0.02)) + xlim(c(-0.25, 0.25)) + ylab("") +
      xlab("Estimate with 95% confidence intervals") + theme_light() )

  (bal <- ggarrange(bal_patient, bal_clinic, 
                    labels = c("A", 
                               "B"),
                    ncol = 1,  heights = c(1.5, 0.9)))
  ggsave(bal, file = paste0(leaf,"patient_mun_balance.pdf"), width = 6, height = 7)
  
# (FIG 4) ----------
  c <- hd
  c %<>% dplyr::group_by(cod_est) %<>% dplyr::mutate(rows_clinic=row_number()) %<>% ungroup()
  c %<>% dplyr::filter(rows_clinic==1)
  length(unique(c$cod_est))
  
  # hist of b estimates - among clinics with non zero bunching
  (b <- ggplot(data=c[c$sig_neg_bunch==1,], aes(b_10)) + 
      geom_histogram( col="white", fill = "black", alpha=0.7) +
      ggtitle("") + theme_light() +
      labs(x="Normalized excess mass", y="N Clinics") +
      theme(axis.text.x = element_text(color = "black", size = 10)) + theme(plot.title = element_text(size=15))) 
  ggsave(b, file = paste0(leaf,"bunching_magnitude.pdf"), width = 5, height = 4)

# (TAB 1) -----------------------------  
  
  sum <- hd
  sum %<>% dplyr::select(female, age_at_visit, age_20s,
                         age_30s, age_40s, age_50s, age_60s, age_70s,
                         bmi_w,  bmi_norm_under, bmi_over, bmi_obese, bmi_sobese,
                         diag_dm10, diag_dislip10, 
                         high_cholesterol,
                         risk_hi, risk_med, risk_low,
                         sedentarismo, fonasa_a, fonasa_b, fonasa_c, fonasa_d)
  
  sum <- rename(sum, Female = female)
  sum <- rename(sum, `Mean age` = age_at_visit )
  sum <- rename(sum, `Type 2 Diabetes` = diag_dm10 )
  sum <- rename(sum, `Dyslipidemia` = diag_dislip10 )
  sum <- rename(sum, `Body mass index`= bmi_w )            
  sum <- rename(sum, `Severely obese BMI`= bmi_sobese) 
  sum <- rename(sum, `Overweight BMI` = bmi_over)  
  sum <- rename(sum, `Normal or underweight BMI`= bmi_norm_under )  
  sum <- rename(sum, `Obese BMI`= bmi_obese )  
  sum <- rename(sum, `High cholesterol`= high_cholesterol )
  sum <- rename(sum, Sedentary= sedentarismo )
  sum <- rename(sum, `Monthly income: below poverty line` = fonasa_a )
  sum <- rename(sum, `Monthly income < $320 or pensioner`= fonasa_b )
  sum <- rename(sum, `Monthly income $320-465`= fonasa_c )
  sum <- rename(sum, `Monthly income > $465`= fonasa_d)
  sum <- rename(sum, `Cardiovascular risk: high`  = risk_hi)
  sum <- rename(sum, `Cardiovascular risk: moderate` = risk_med) 
  sum <- rename(sum, `Cardiovascular risk: low` = risk_low)
  sum <- rename(sum, `Age 18-29 ` = age_20s)
  sum <- rename(sum, `Age 30-39` = age_30s)
  sum <- rename(sum, `Age 40-49` = age_40s)
  sum <- rename(sum, `Age 50-59` = age_50s)
  sum <- rename(sum, `Age 60-69` = age_60s)
  sum <- rename(sum, `Age 70-79` = age_70s)
  
  
  
  
  sum <- as.data.frame(sum)
  stargazer(sum, flip=F, table.layout ="-d#-t-sna-",
            summary.stat = c("mean", "sd"),
            title="Summary Statistics",
            out = paste0(leaf, "sum_stats_patient.tex"))
  

  mum <- hd
  mum %<>% dplyr::group_by(cod_est) %<>% dplyr::mutate(rows_clinic = row_number()) %<>% ungroup()
  mum %<>% dplyr::filter(rows_clinic==1)
  mum %<>% dplyr::select(ypch_usd2021, log_visits, mean_age, secondary_or_less, rural_mean,
                         tertiary, type_consurb , type_consrural, type_hospbaja,
                         e1_1_mean, activ_1_mean)
  
  mum <- rename(mum, `Household income per capita`= ypch_usd2021)
  mum <- rename(mum, `Mean municipality age`= mean_age)
  mum <- rename(mum, `% of municipality that is rural`= rural_mean)
  mum <- rename(mum, `Literacy rate in municipality`= e1_1_mean)
  mum <- rename(mum, `Employment rate in municipality`= activ_1_mean)
  mum <- rename(mum, `Secondary or primary education`= secondary_or_less)
  mum <- rename(mum, `Tertiary education`= tertiary)
  mum <- rename(mum, `Urban primary care clinic`= type_consurb)
  mum <- rename(mum, `Rural primary care clinic`= type_consrural)
  mum <- rename(mum, `Low complexity hospital`= type_hospbaja)
  mum <- rename(mum, `Log visits per clinic`= log_visits)
  
  
  mum <- as.data.frame(mum)
  stargazer(mum, flip=F, table.layout ="-d#-t-sna-",
            summary.stat = c("mean", "sd"),
            out = paste0(leaf, "sum_stats_clinic.tex"))
  
  
# (TAB 2,3,4) ---------------------
  
  # functions:
  mean1 <- function(x) {
    meantest <- round(mean(hd[[paste0(x)]], na.rm=T),3)
  }
  clinics1 <- function(x) {
    # Check if the column exists and has non-NA values
    if (any(!is.na(hd[[paste0(x)]]))) {
      # Calculate the number of unique values in the "cod_est" column
      clinictest <- length(unique(hd[["cod_est"]]))
      return(clinictest) # Ensure the result is returned
    } else {
      return(NA) # Return NA if all values are NA or column doesn't exist
    }
  }
  
  
  ## Table 2 - Diag and Prescription 
  outcomes <- c('diag_htn10', 'med_htn_ed')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Discretion on Hypertension Diagnosis and Prescription", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_guidelines",
            dep.var.labels= c('\\shortstack{Hypertension \\\\ Diagnosis}', '\\shortstack{Hypertension \\\\ Prescription}'),
            omit.table.layout = "n", font.size = "small", 
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_guidelines.tex"))

  
  ## Table 3 - CV hospitalizations per 100 patients 
  hd$cv_stroke_90_h <- hd$cv_stroke_90*100
  hd$cv_acs_90_h <- hd$cv_acs_90 *100
  hd$cv_chf_90_h <- hd$cv_chf_90 *100
  hd$cv_stroke_180_h <- hd$cv_stroke_180*100
  hd$cv_acs_180_h <- hd$cv_acs_180 *100
  hd$cv_chf_180_h <- hd$cv_chf_180 *100
  hd$cv_stroke_365_h <- hd$cv_stroke_365*100
  hd$cv_acs_365_h <- hd$cv_acs_365 *100
  hd$cv_chf_365_h <- hd$cv_chf_365 *100
  
  outcomes <- c( 'cv_stroke_90_h', 'cv_stroke_180_h', 'cv_stroke_365_h',
                 'cv_acs_90_h', 'cv_acs_180_h', 'cv_acs_365_h',
                 'cv_chf_90_h',  'cv_chf_180_h', 'cv_chf_365_h')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Discretion on Cardiovascular Hospitalization 3, 6, and 12 Months After Primary Care Visit", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.(\\%)", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_cv",
            dep.var.labels= c('$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.', 
                              '$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.',
                              '$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.'),
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            omit.table.layout = "n", font.size = "small",
            out = paste0(leaf, "results_cv.tex"))

  
  ## Table 4 - Heuristics 
  outcomes <- c('male', 'bmi_sobese', 'over55', 'over65')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter    | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Selection by Patient Characteristics Representative of High Cardiovascular Risk", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-",
            dep.var.labels= c('Male',  '\\shortstack{Severe\\\\Obesity}', 'Age 55+', 'Age 65+'),
            omit.table.layout = "n", font.size = "small", label = "tab:results_heuristics",
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_heuristics.tex"))
  
  
  
  
# (FIG 5, TAB A8, TAB A9) -----------------
  

  hd$height_measured <- ifelse(!is.na(hd$talla), 1, 0)
  hd %<>% dplyr::mutate(talla = ifelse(talla<2, talla*100, talla))
  cut_point_top <- quantile(hd$talla, 0.99, na.rm = T)
  cut_point_bottom <- quantile(hd$talla, 0.01, na.rm = T)
  hd %<>% mutate(outlier_top_talla = (talla >= cut_point_top))
  hd %<>% mutate(outlier_bottom_talla = (talla <= cut_point_bottom))
  hd$talla_w <- ifelse(hd$outlier_top_talla, cut_point_top, hd$talla)
  hd$talla_w <- ifelse(hd$outlier_bottom_talla, cut_point_bottom, hd$talla)
  hist(hd$talla_w)
  hd %<>% dplyr::mutate(height = ifelse((is.na(talla) | talla==0), mean(talla, na.rm=T), talla))
  hist(hd$height, breaks=100)
  hd$height_log <- log(hd$height)
  hd$hba1c_gluc_test <- ifelse((!is.na(hd$hba1c) | !is.na(hd$glicemia_mgdl)), 1, 0)
  hd$chol_measured <- ifelse((!is.na(hd$trigliceridos_mgdl) | !is.na(hd$colesterol_total_mgdl) | !is.na(hd$hdl_mgdl)), 1, 0)
  hd$weight_measured <- ifelse(!is.na(hd$peso), 1, 0)
  hd$peso_imp <- ifelse((is.na(hd$peso) | hd$peso==0), mean(hd$peso, na.rm=T), hd$peso)
  hd$log_weight <- log(hd$peso_imp)

  outcomes <- c('hba1c_gluc_test', 'chol_measured', 'weight_measured', 
                'height_measured', 'log_weight', 'height_log', 
                'diag_dislip10', 'diag_dm10')
  
  # Placebo table:
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, "~ sig_neg_bunch_mag*under140   | year + quarter  | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Discretion on Placebo Outcomes: Clinical Decisions and Biomarkers", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_placebo2",
            omit.table.layout = "n", font.size = "small", 
            dep.var.labels= c('Blood glucose', 'Cholesterol', 'Weight', 'Height', 'Log weight', 'Log height', 'Dyslipidemia', 'Type 2 diabetes'),
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_placebo_new.tex"))
  
  
  ## Placebo Outcomes 
  outcomes <- c( 'any_nohtn_90', 'any_nohtn_180', 'any_nohtn_365')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter + male + age_round  | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  dd <- hd
  dd %<>% dplyr::filter(diag_dm10==1)
  
  outcomes <- c( 'dm_hosp_90', 'dm_hosp_180', 'dm_hosp_365')
  
  meandd <- function(x) {
    meantest <- round(mean(dd[[paste0(x)]], na.rm=T),3)
  }
  clinicsdd <- function(x) {
    if (!all(is.na(dd[[paste0(x)]]))) {  # Check if not all values are NA
      clinictest <- length(unique(dd[["cod_est"]]))
      return(clinictest)                # Explicitly return the value
    } else {
      return(NA)                        # Return NA if all values are NA
    }
  }
  meandd <- as.character(lapply(outcomes, meandd))
  clinicdd <- as.character(lapply(outcomes, clinicsdd))
  
  reglist2 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
    felm(form, data = dd, exactDOF=TRUE)
  })
  
  
  reglist1[[length(reglist1) + 1]] <- reglist2
  meandf <- cbind(meandf, meandd)
  clinicdf <- cbind(clinicdf, clinicdd)
  
  stargazer(reglist1, title="Impact of Discretion on Placebo Outcomes: Hospitalizations 3, 6, and 12 Months After Primary Care Visit", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.(\\%)", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_all_dm",
            dep.var.labels= c('$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.', 
                              '$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.'),
            omit.table.layout = "n", font.size = "small",
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_all_dm.tex"))
  
  outcomes <- c('hba1c_gluc_test', 'chol_measured', 'weight_measured', 
                'height_measured', 'log_weight', 'height_log', 
                'diag_dislip10', 'diag_dm10', 
                'any_nohtn_90', 'any_nohtn_180', 'any_nohtn_365', 'dm_hosp_90', 'dm_hosp_180', 'dm_hosp_365')
  
  coefs <- data.frame()
  for (outcome in outcomes) {
    est <- felm(as.formula(paste0(outcome, " ~ ", "sig_neg_bunch_mag*under140 | year + quarter + male + age_round | 0 | cod_est")), data = hd)
    
    coef_df <- as.data.frame(summary(est)$coefficients)[3, 1:2, drop = FALSE] 
    coef_df$depvar <- outcome
    
    coefs <- rbind(coefs, coef_df)
  }
  rownames(coefs) <- NULL
  rm(coef_df)
  
  coefs$var <- NA
  coefs[coefs$depvar=="hba1c_gluc_test",]$var <- "Blood glucose measured"
  coefs[coefs$depvar=="chol_measured",]$var <- "Cholesterol measured"
  coefs[coefs$depvar=="weight_measured",]$var <- "Weight measured"
  coefs[coefs$depvar=="height_measured",]$var <- "Height measured"
  coefs[coefs$depvar=="log_weight",]$var <- "Log weight (kg)"
  coefs[coefs$depvar=="height_log",]$var <- "Log height (cm)"
  coefs[coefs$depvar=="diag_dislip10",]$var <- "Dyslipidemia diagnosis"
  coefs[coefs$depvar=="diag_dm10",]$var <- "Type 2 diabetes diagnosis"
  coefs[coefs$depvar=="any_nohtn_90",]$var <-  "Non-cardiovascular hospitalization within 90 days"
  coefs[coefs$depvar=="any_nohtn_180",]$var <- "Non-cardiovascular hospitalization within 180 days"
  coefs[coefs$depvar=="any_nohtn_365",]$var <- "Non-cardiovascular hospitalization within 365 days"
  coefs[coefs$depvar=="dm_hosp_90",]$var <- "Diabetes hospitalization within 90 days*"
  coefs[coefs$depvar=="dm_hosp_180",]$var <- "Diabetes hospitalization within 180 days*"
  coefs[coefs$depvar=="dm_hosp_365",]$var <- "Diabetes hospitalization within 365 days*"
  
  coefs$var <- as.character(coefs$var)
  coefs$var <- factor(coefs$var, levels=c(
    "Diabetes hospitalization within 365 days*",
    "Diabetes hospitalization within 180 days*",
    "Diabetes hospitalization within 90 days*",
    "Non-cardiovascular hospitalization within 365 days",
    "Non-cardiovascular hospitalization within 180 days",
    "Non-cardiovascular hospitalization within 90 days",
    "Type 2 diabetes diagnosis",
    "Dyslipidemia diagnosis",
    "Log height (cm)",
    "Log weight (kg)",
    "Height measured",
    "Weight measured",
    "Cholesterol measured",
    "Blood glucose measured"
  ))
  
  (placebo <- ggplot(coefs, aes(y=var, x=Estimate)) + 
      geom_vline(xintercept=0, lty=1,  colour="grey70")  +
      geom_errorbar(aes(xmin=Estimate - 1.96*`Cluster s.e.`, xmax=Estimate + 1.96*`Cluster s.e.`), width=0, color='black') + 
      geom_point(color='black') + xlim(-0.015, 0.015) + 
      xlab("Difference-in-differences estimate with 95% CI") + ylab("") + theme_light() )
  
  (placebo1 <- placebo + guides(color=guide_legend(nrow=2, byrow=F, title.position="top")))  
  ggsave(placebo1, file = paste0(leaf,"results_placebo_outcomes.pdf"), width = 6.5, height = 4)
  
  
# (FIG 6) ------
  
  hdh10 <- hd
  hdh10 %<>% dplyr::filter(sig_neg_bunch==1)
  
  out_highround_10 <- NA
  out_highround_10 <- data.frame(matrix(nrow = 2, ncol = 11))
  colnames(out_highround_10) <- c("characteristic", "B_10", "B_sd_10", "b_10", "b_sd_10", "e_10", "mean_marg_10", "sd_marg_10", "n", "n_bin", "group")
  
  ## bunching estimator - among males
  bunch <- bunchit(z_vector = hdh10[hdh10$male==1,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                   poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                   p_b = TRUE, seed = 69, rn = c(10), 
                   p_title = paste0("Notch (high rounding to zero clinics) Males"),
                   p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                   p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")

  out_highround_10[1,1] <- "Male"
  out_highround_10[1, 2] <- bunch$B
  out_highround_10[1, 3] <- bunch$B_sd
  out_highround_10[1, 4] <- bunch$b
  out_highround_10[1, 5] <- bunch$b_sd
  out_highround_10[1, 6] <- bunch$e  
  out_highround_10[1, 7] <- mean(bunch$marginal_buncher_vector)    
  out_highround_10[1, 8] <- bunch$marginal_buncher_sd
  out_highround_10[1, 9] <- nrow(hdh10[hdh10$male==1,])
  out_highround_10[1, 10] <- nrow(hdh10[hdh10$male==1 & (hdh10$pa_sys==140 | hdh10$pa_sys==141),])
  out_highround_10[1, 11] <- "Sex"
  
  
  ## bunching estimator - among females
  bunch <- bunchit(z_vector = hdh10[hdh10$male==0,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                   poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                   p_b = TRUE, seed = 69, rn = c(10), 
                   p_title = paste0("Notch (high rounding to zero clinics) Females"),
                   p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                   p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
  
  out_highround_10[2,1] <- "Female"
  out_highround_10[2, 2] <- bunch$B
  out_highround_10[2, 3] <- bunch$B_sd
  out_highround_10[2, 4] <- bunch$b
  out_highround_10[2, 5] <- bunch$b_sd
  out_highround_10[2, 6] <- bunch$e  
  out_highround_10[2, 7] <- mean(bunch$marginal_buncher_vector)    
  out_highround_10[2, 8] <- bunch$marginal_buncher_sd
  out_highround_10[2, 9] <- nrow(hdh10[hdh10$male==0,])
  out_highround_10[2, 10] <- nrow(hdh10[hdh10$male==0 & (hdh10$pa_sys==140 | hdh10$pa_sys==141),])
  out_highround_10[2, 11] <- "Sex"
  
  ## bunching estimator - among over55
  bunch <- bunchit(z_vector = hdh10[hdh10$over55==1,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                   poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                   p_b = TRUE, seed = 69, rn = c(10), 
                   p_title = paste0("Notch (high rounding to zero clinics) Over55"),
                   p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                   p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
  

  out_highround_10[3,1] <- "Age 55+"
  out_highround_10[3, 2] <- bunch$B
  out_highround_10[3, 3] <- bunch$B_sd
  out_highround_10[3, 4] <- bunch$b
  out_highround_10[3, 5] <- bunch$b_sd
  out_highround_10[3, 6] <- bunch$e  
  out_highround_10[3, 7] <- mean(bunch$marginal_buncher_vector)    
  out_highround_10[3, 8] <- bunch$marginal_buncher_sd
  out_highround_10[3, 9] <- nrow(hdh10[hdh10$over55==1,])
  out_highround_10[3, 10] <- nrow(hdh10[hdh10$over55==1 & (hdh10$pa_sys==140 | hdh10$pa_sys==141),])
  out_highround_10[3, 11] <- "Age 55"
  
  ## bunching estimator - among under55
  bunch <- bunchit(z_vector = hdh10[hdh10$over55==0,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                   poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                   p_b = TRUE, seed = 69, rn = c(10), 
                   p_title = paste0("Notch (high rounding to zero clinics) Under55"),
                   p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                   p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
  
  out_highround_10[4,1] <- "Age <55"
  out_highround_10[4, 2] <- bunch$B
  out_highround_10[4, 3] <- bunch$B_sd
  out_highround_10[4, 4] <- bunch$b
  out_highround_10[4, 5] <- bunch$b_sd
  out_highround_10[4, 6] <- bunch$e  
  out_highround_10[4, 7] <- mean(bunch$marginal_buncher_vector)    
  out_highround_10[4, 8] <- bunch$marginal_buncher_sd
  out_highround_10[4, 9] <- nrow(hdh10[hdh10$over55==0,])
  out_highround_10[4, 10] <- nrow(hdh10[hdh10$over55==0 & (hdh10$pa_sys==140 | hdh10$pa_sys==141),])
  out_highround_10[4, 11] <- "Age 55"
  
  ## bunching estimator - among over65
  bunch <- bunchit(z_vector = hdh10[hdh10$over65==1,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                   poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                   p_b = TRUE, seed = 69, rn = c(10), 
                   p_title = paste0("Notch (high rounding to zero clinics) Over65"),
                   p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                   p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
  
  out_highround_10[5,1] <- "Age 65+"
  out_highround_10[5, 2] <- bunch$B
  out_highround_10[5, 3] <- bunch$B_sd
  out_highround_10[5, 4] <- bunch$b
  out_highround_10[5, 5] <- bunch$b_sd
  out_highround_10[5, 6] <- bunch$e  
  out_highround_10[5, 7] <- mean(bunch$marginal_buncher_vector)    
  out_highround_10[5, 8] <- bunch$marginal_buncher_sd
  out_highround_10[5, 9] <- nrow(hdh10[hdh10$over65==1,])
  out_highround_10[5, 10] <- nrow(hdh10[hdh10$over65==1 & (hdh10$pa_sys==140 | hdh10$pa_sys==141),])
  out_highround_10[5, 11] <- "Age 65"
  
  ## bunching estimator - among under65
  bunch <- bunchit(z_vector = hdh10[hdh10$over65==0,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                   poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                   p_b = TRUE, seed = 69, rn = c(10), 
                   p_title = paste0("Notch (high rounding to zero clinics) Under65"),
                   p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                   p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")

  # save results
  out_highround_10[6,1] <- "Age <65"
  out_highround_10[6, 2] <- bunch$B
  out_highround_10[6, 3] <- bunch$B_sd
  out_highround_10[6, 4] <- bunch$b
  out_highround_10[6, 5] <- bunch$b_sd
  out_highround_10[6, 6] <- bunch$e  
  out_highround_10[6, 7] <- mean(bunch$marginal_buncher_vector)    
  out_highround_10[6, 8] <- bunch$marginal_buncher_sd
  out_highround_10[6, 9] <- nrow(hdh10[hdh10$over65==0,])
  out_highround_10[6, 10] <- nrow(hdh10[hdh10$over65==0 & (hdh10$pa_sys==140 | hdh10$pa_sys==141),])
  out_highround_10[6, 11] <- "Age 65"
  
  
  out_highround_10$b_t_10 <- out_highround_10$b_10 / out_highround_10$b_sd_10
  out_highround_10$abs_b_t_10 <- abs(out_highround_10$b_t_10)
  out_highround_10$sig05 <- ifelse(out_highround_10$abs_b_t_10>=1.96, 1, 0)
  save(out_highround_10, file = paste0(gen_data,"out_highround_10_posbunch_males_over65.Rda"))
  
  out_highround_10 %<>%
    mutate(characteristic = fct_relevel(characteristic, "Age <55", "Age 55+", "Age <65", "Age 65+", "Female", "Male"))

  group_comparisons <- list(
    c("Male", "Female"),
    c("Age <55", "Age 55+"),
    c("Age <65", "Age 65+")
  )
  
  compute_p_value <- function(group1, group2, df) {
    row1 <- df %>% filter(characteristic == group1)
    row2 <- df %>% filter(characteristic == group2)
  
    diff <- row1$B_10 - row2$B_10
    pooled_se <- sqrt(row1$B_sd_10^2 + row2$B_sd_10^2)
  
    t_stat <- diff / pooled_se
    p_value <- 2 * (1 - pnorm(abs(t_stat)))
    
    return(data.frame(Comparison = paste(group1, "vs", group2), t_stat = t_stat, p_value = p_value))
  }
  
  comparison_results <- do.call(rbind, lapply(group_comparisons, function(g) compute_p_value(g[1], g[2], out_highround_10)))
  
  (b <- ggplot(out_highround_10) +
      geom_bar( aes(x=characteristic, y=b_10, fill=group), stat="identity",  alpha=0.7) +
      geom_errorbar( aes(x=characteristic, ymin=b_10-b_sd_10, ymax=b_10+b_sd_10), width=0, colour="black", alpha=0.9, linewidth=0.5) +
      labs(x = "", y = "Normalized excess mass") +
      theme_bw() + ylim(c(-0.6, 0)) + theme(legend.position = "none") + 
      scale_fill_manual(values = c("black", "grey40", "grey70")) +
      theme(axis.text.x = element_text( color="black", size=12), 
            axis.text.y = element_text(color="black", size=12))
  )
  ggsave(b, file = paste0(leaf,"bunch_mag_heuristics.pdf"), width = 6, height = 4.5) 
  
  

  
 
  
  
#### APPENDIX #####  
# (TAB A2) ---------------
  
  # reason for being in cardiovascular program
  hd$pscv_htn <- grepl("1", hd$diag_cv, fixed=TRUE)
  hd$pscv_dm <- grepl("2", hd$diag_cv, fixed=TRUE)
  hd$pscv_dislip <- grepl("3", hd$diag_cv, fixed=TRUE)
  
  length(unique(hd[is.na(hd$diag_cv),]$id)) 
  
  hd %<>% dplyr::mutate(pscv_smoke = ifelse(is.na(diag_cv) & age_at_visit>=55 & tabaco==1, 1, 0))
  hd %<>% dplyr::mutate(pscv_history = ifelse(is.na(diag_cv) &  (edema==1|iam==1|retinopatia==1|neuropatia==1|hipertrofia_v_izq==1|
                                                                   erc==1|ant_fliar_cardiopatia==1|ant_lipidos_genetico==1) , 1, 0))
  hd %<>% dplyr::mutate(pscv_unknown = ifelse(is.na(diag_cv) & 
                                                (pscv_htn==0 & pscv_dm==0 & pscv_dislip==0 & 
                                                   pscv_smoke==0 & pscv_history==0), 1, 0))

  hd %<>% dplyr::mutate(pscv_smoke_a = ifelse(age_at_visit>=55 & tabaco==1, 1, 0))
  hd %<>% dplyr::mutate(pscv_history_a = ifelse((edema==1|iam==1|retinopatia==1|neuropatia==1|hipertrofia_v_izq==1|
                                                   erc==1|ant_fliar_cardiopatia==1|ant_lipidos_genetico==1) , 1, 0))
  hd %<>% dplyr::mutate(pscv_unknown_a = ifelse((pscv_htn==0 & pscv_dm==0 & pscv_dislip==0 & 
                                                   pscv_smoke_a==0 & pscv_history_a==0), 1, 0))
  
  hd %<>% dplyr::mutate(pscv_htn = ifelse(is.na(pscv_htn), 0, pscv_htn))
  hd %<>% dplyr::mutate(pscv_dm = ifelse(is.na(pscv_dm), 0, pscv_dm))
  hd %<>% dplyr::mutate(pscv_dislip = ifelse(is.na(pscv_dislip), 0, pscv_dislip))
  hd %<>% dplyr::mutate(pscv_smoke = ifelse(is.na(pscv_smoke), 0, pscv_smoke))
  hd %<>% dplyr::mutate(pscv_history = ifelse(is.na(pscv_history), 0, pscv_history))
  hd %<>% dplyr::mutate(pscv_unknown = ifelse(is.na(pscv_unknown), 0, pscv_unknown))
  hd %<>% dplyr::mutate(pscv_smoke_a = ifelse(is.na(pscv_smoke_a), 0, pscv_smoke_a))
  hd %<>% dplyr::mutate(pscv_history_a = ifelse(is.na(pscv_history_a), 0, pscv_history_a))
  hd %<>% dplyr::mutate(pscv_unknown_a = ifelse(is.na(pscv_unknown_a), 0, pscv_unknown_a))
  
  c <- hd
  c %<>% dplyr::mutate(pscv_smoke_a = ifelse(pscv_smoke_a==1, mean(pscv_smoke_a)*100, NA))
  c %<>% dplyr::mutate(pscv_dm = ifelse(pscv_dm==1, mean(pscv_dm)*100, NA))
  c %<>% dplyr::mutate(pscv_dislip = ifelse(pscv_dislip==1, mean(pscv_dislip)*100, NA))
  c %<>% dplyr::mutate(pscv_history_a = ifelse(pscv_history_a==1, mean(pscv_history_a)*100, NA))
  c %<>% dplyr::mutate(pscv_unknown = ifelse(pscv_unknown==1, mean(pscv_unknown)*100, NA))
  
  c$`Smoker and over 55` <- c$pscv_smoke_a
  c$`Type 2 Diabetes` <- c$pscv_dm
  c$`Dyslipidemia` <- c$pscv_dislip
  c$`History of CVD` <- c$pscv_history_a
  c$`Unknown` <- c$pscv_unknown
  
  c %<>% dplyr::select(`Smoker and over 55`, `Type 2 Diabetes`, 
                       `Dyslipidemia`, `History of CVD`, `Unknown`)
  c <- as.data.frame(c)
  stargazer(c, flip=F, table.layout ="-d#-t-sna-",
            summary.stat = c("n", "mean"),
            title="Patient Reasons for being in PSCV Data",
            out = paste0(leaf, "pscv.tex"))

# (FIG A1) ------
  ens <- read.csv(paste0(gen_data,'ens_2016_analysis.csv'))
  
  ens %<>% filter(!is.na(any_pscv))
  (dens <- ggplot(ens, aes(pa_sys, fill = factor(any_pscv))) +  geom_vline(xintercept=140, color = 'black') +
      geom_density(alpha = 0.4) + scale_x_continuous(breaks = seq(80, 240, by = 20)) + 
      labs(title = "") + xlab("Systolic blood pressure") + ylab("Density") +
      scale_fill_manual(values = c("grey90", "darkblue")) + 
      labs(fill = "PSCV eligible patient")  + theme_bw() + theme(legend.position="bottom"))
  ggsave(dens, file = paste0(leaf,"ens_density_bp.pdf"), width = 7, height = 5) 
  
# (FIG A2) ------------------------
    
  # national health survey (ENS) diastolic bp
  (ens_dia <- ggplot(data=ens, aes(pa_dia)) + geom_vline(xintercept=90, color = 'black', linetype = 'dashed') +
      geom_histogram(binwidth = 1, col="white", fill = "black", alpha=1) + theme_light() + 
      labs(x="Diastolic Blood Pressure", y="N Individuals") + scale_y_continuous(labels = comma) +
      scale_x_continuous(breaks=c(50, 60, 70, 80, 90, 100, 110, 120, 130)) + xlim(50,130) + 
      theme(axis.text.x = element_text(color = "black", size = 10)) + theme(plot.title = element_text(size=15)))
  
  # electronic health records (ENS) diastolic bp
  (ehr_dia <- ggplot(data=hd, aes(pa_dia)) + geom_vline(xintercept=90, color = 'black', linetype = 'dashed') +
      geom_histogram(binwidth = 1, col="white", fill = "black", alpha=1) + theme_light() + 
      labs(x="Diastolic Blood Pressure", y="N Individuals") + scale_y_continuous(labels = comma) +
      scale_x_continuous(breaks=c(50, 60, 70, 80, 90, 100, 110, 120, 130)) + xlim(50,130) + 
      theme(axis.text.x = element_text(color = "black", size = 10)) + theme(plot.title = element_text(size=15)))
  
  dia_both <- ggpubr::ggarrange(ens_dia, ehr_dia, ncol = 2, nrow = 1)
  ggsave(dia_both, file = paste0(leaf,"healthsurvey_ehr_dia.pdf"), width = 8, height = 3.5)
  
  
# (FIG A3) ---------------------
  hd$bunchm <- ifelse(hd$sig_neg_bunch_mag>=0, 1, 0)
  hd$bunching_median <- as.factor(hd$bunchm)
  hd$low_rounding <- as.factor(hd$low_rounding10)
  
  (sys <- ggplot(data=hd[hd$pa_sys>=90 & hd$pa_sys<=200,], aes(pa_sys)) +
      geom_vline(xintercept=140, color = 'forestgreen', linetype = 'dashed') +
      geom_histogram(binwidth = 1, col="white", fill = "black", alpha=1) +
      ggtitle("Systolic Blood Pressure - Above and Below Median") +
      labs(x="Systolic Blood Pressure (mmHg)", y="N Visits") + scale_y_continuous(labels = comma) +
      scale_x_continuous(breaks=c(80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210))  + 
      theme(axis.text.x = element_text(color = "black", size = 10)) + theme(plot.title = element_text(size=15)) + theme_minimal())
  
  (sysq <- sys + facet_wrap(bunching_median ~ low_rounding, ncol=2, labeller=label_both, scales='free'))
  ggsave(sysq, file = paste0(leaf,"bunching_median_hist.pdf"), width = 12, height = 7) 
  
  
# (FIG A4) ----------
 
  hd$bunchm <- ifelse(hd$sig_neg_bunch_mag>=0, 1, 0)
  hd$bunching_median <- as.factor(hd$bunchm)
  hd$low_rounding <- as.factor(hd$low_rounding10)
  
  (sys <- ggplot(data=hd[hd$peso>30 & hd$peso<=130 & !is.na(hd$peso),], aes(peso)) +
      geom_histogram(binwidth = 2, col="white", fill = "black", alpha=1) + 
      labs(x="Weight (kg)", y="N Visits") + scale_y_continuous(labels = comma) + 
      theme(axis.text.x = element_text(color = "black", size = 10)) + theme(plot.title = element_text(size=15)) + theme_minimal())
  
  (sysq <- sys + facet_wrap(bunching_median ~ low_rounding, ncol=2, labeller=label_both, scales='free'))
  ggsave(sysq, file = paste0(leaf,"weight_bunching_median_hist.pdf"), width = 8, height = 6) 
  
  (sys <- ggplot(data=hd[hd$ldl_mgdl>0 & hd$ldl_mgdl<=260 & !is.na(hd$ldl_mgdl),], aes(ldl_mgdl)) +
      geom_histogram(binwidth = 2, col="white", fill = "black", alpha=1) + 
      labs(x="LDL Cholesterol (mg/dL)", y="N Visits") + scale_y_continuous(labels = comma) + 
      theme(axis.text.x = element_text(color = "black", size = 10)) + theme(plot.title = element_text(size=15)) + theme_minimal())
  
  (sysq <- sys + facet_wrap(bunching_median ~ low_rounding, ncol=2, labeller=label_both, scales='free'))
  ggsave(sysq, file = paste0(leaf,"ldl_bunching_median_hist.pdf"), width = 8, height = 6) 
  
  
# (FIG A5) --------
  
  hd$sig_neg_bunch <- as.factor(hd$sig_neg_bunch)
  hd$pa_sys_10 <- round(hd$pa_sys, digits=-1)
  hd %<>% dplyr::mutate(pa_sys_10 = ifelse(pa_sys_10>200, 200, pa_sys_10))

  outcomes <- c('diag_htn10', 'med_htn_ed', 'cv_stroke_90_h', 
                'cv_stroke_180_h', 'cv_stroke_365_h',
                'cv_acs_90_h', 'cv_acs_180_h', 'cv_acs_365_h',
                'cv_chf_90_h',  'cv_chf_180_h', 'cv_chf_365_h',
                'male', 'bmi_sobese', 'over55', 'over65')
  
  results_list <- list()
  plots_list <- list()
  
  for (outcome in outcomes) {
    result <- data.frame(systolic_bp = numeric(),
                         outcome_value = numeric(),
                         bunching_clinic = factor(),
                         n = numeric(),
                         lower = numeric(),
                         upper = numeric())
    
    for (i in seq(90, 200, 10)) {
      m1 <- felm(as.formula(paste(outcome, "~ 1 | 0 | 0 | cod_est")), 
                 data = hd[hd$pa_sys_10 == i & hd$sig_neg_bunch == 0, ])
      cm1 <- confint(m1)
      result <- rbind(result, data.frame(
        systolic_bp = i,
        outcome_value = m1$coefficients[1, 1],
        bunching_clinic = 0,
        n = nrow(hd[hd$pa_sys_10 == i & hd$sig_neg_bunch == 0, ]),
        lower = cm1[1, 1],
        upper = cm1[1, 2]
      ))
    }
    
    result2 <- data.frame(systolic_bp = numeric(),
                          outcome_value = numeric(),
                          bunching_clinic = factor(),
                          n = numeric(),
                          lower = numeric(),
                          upper = numeric())
    
    for (i in seq(90, 200, 10)) {
      m1 <- felm(as.formula(paste(outcome, "~ 1 | 0 | 0 | cod_est")), 
                 data = hd[hd$pa_sys_10 == i & hd$sig_neg_bunch == 1, ])
      cm1 <- confint(m1)
      result2 <- rbind(result2, data.frame(
        systolic_bp = i,
        outcome_value = m1$coefficients[1, 1],
        bunching_clinic = 1,
        n = nrow(hd[hd$pa_sys_10 == i & hd$sig_neg_bunch == 1, ]),
        lower = cm1[1, 1],
        upper = cm1[1, 2]
      ))
    }
    
    result_final <- rbind(result, result2)
    result_final$bunching_clinic <- as.factor(result_final$bunching_clinic)
    
    result_final[result_final$systolic_bp==130,]$outcome_value <- NA
    result_final[result_final$systolic_bp==130,]$lower <- NA
    result_final[result_final$systolic_bp==130,]$upper <- NA
    result_final[result_final$systolic_bp==140,]$outcome_value <- NA
    result_final[result_final$systolic_bp==140,]$lower <- NA
    result_final[result_final$systolic_bp==140,]$upper <- NA
    result_final[result_final$systolic_bp==150,]$outcome_value <- NA
    result_final[result_final$systolic_bp==150,]$lower <- NA
    result_final[result_final$systolic_bp==150,]$upper <- NA
    
    results_list[[outcome]] <- result_final
  
    reg_plot <- ggplot(result_final, aes(x = systolic_bp, y = outcome_value, 
                                         color = bunching_clinic, group = bunching_clinic)) +
      geom_line() +
      geom_vline(xintercept = 140, color = 'grey') +
      geom_ribbon(aes(ymin = lower, ymax = upper, fill = bunching_clinic), alpha=0.2, colour = NA, show.legend = F) + 
      scale_fill_manual(values = c("grey37", "darkgreen")) +
      geom_point(size = 2, position = position_dodge2(width = 3)) +
      scale_color_manual(values = c("darkgrey", "darkgreen")) +
      scale_x_continuous(breaks = seq(90, 200, by = 10)) +
      labs(x = "Systolic blood pressure", 
           y = "Y",
           color = "High Discretion Clinic") +
      theme_bw() +
      theme(legend.position = c(0.52, 0.94), 
            legend.direction = "horizontal", 
            legend.background = element_rect(fill = NA), 
            legend.key = element_rect(fill = NA, color = NA))
    
    plots_list[[outcome]] <- reg_plot
    ggsave(filename = paste0(leaf, "excluded_reg_", outcome, "_pa_sys_10.pdf"), 
           plot = reg_plot, width = 4, height = 3)
  }

# estimate shrinkage ---------------
  hd %<>% dplyr::group_by(cod_est) %<>% dplyr::mutate(rows_clinic = row_number()) %<>% ungroup()
  bd <- hd
  bd %<>% dplyr::filter(rows_clinic==1)
  bd %<>% dplyr::select(cod_est, b_10, b_sd_10, b_05, b_sd_05, b_20, b_sd_20)

  ash_fit <- ash(betahat = bd$b_10, sebetahat = bd$b_sd_10) # allow program to select prior distribution (it selects method = "fdr")
  bd <- bd %>% mutate(b_10_shrinkage = ash_fit$result$PosteriorMean)  
  
  df_comparison <- bd %>% 
    select(cod_est, original = b_10, shrinkage = b_10_shrinkage)
  df_comparison$diff <- df_comparison$original - df_comparison$shrinkage
  shrinkage_mean <- mean(df_comparison$shrinkage, na.rm = TRUE)

  df_comparison$inv_shrinkage <- df_comparison$shrinkage * -1
  df_comparison$norm_shrink <- (df_comparison$inv_shrinkage - mean(df_comparison$inv_shrinkage, na.rm=T))/sd(df_comparison$inv_shrinkage, na.rm = T)
  col.names.remove <- c("original", "shrinkage", "norm_shrink", "inv_shrinkage")
  hd <- hd[,!(colnames(hd) %in% col.names.remove)]
  
  hd <- dplyr::left_join(hd, df_comparison, by="cod_est")
  
# (FIG A6) -------------------------
  est <- felm(diag_htn10 ~ sig_neg_bunch_mag*under140 | year + quarter    | 0 | cod_est, data=hd)
  coefs = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs$depvar = 'diag_htn10'
  coefs$indepvar = 'sig_neg_bunch_mag'
  
  # append:
  est <- felm(diag_htn10 ~ neg_bunch_mag*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'diag_htn10'
  coefs1$indepvar = 'neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(diag_htn10 ~ norm_bunch*under140 | year + quarter  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'diag_htn10'
  coefs1$indepvar = 'norm_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(diag_htn10 ~ sig_neg_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'diag_htn10'
  coefs1$indepvar = 'sig_neg_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(diag_htn10 ~ norm_shrink*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'diag_htn10'
  coefs1$indepvar = 'norm_shrink'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(med_htn_ed ~ sig_neg_bunch_mag*under140 | year + quarter  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'med_htn_ed'
  coefs1$indepvar = 'sig_neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(med_htn_ed ~ neg_bunch_mag*under140 | year + quarter | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'med_htn_ed'
  coefs1$indepvar = 'neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(med_htn_ed ~ norm_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'med_htn_ed'
  coefs1$indepvar = 'norm_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(med_htn_ed ~ sig_neg_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'med_htn_ed'
  coefs1$indepvar = 'sig_neg_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(med_htn_ed ~ norm_shrink*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'med_htn_ed'
  coefs1$indepvar = 'norm_shrink'
  coefs <- rbind(coefs, coefs1)
  
  coefs$vars <- paste(coefs$depvar, coefs$indepvar)
  coefs$indepvar <- as.factor(coefs$indepvar)    
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="neg_bunch_mag", "Mag. bunching, negative bunching clinics", "")
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="sig_neg_bunch_mag", "(Preferred) Magnitude of bunching, if p<0.05", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="sig_neg_bunch", "Indicator for bunching with p<0.05", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="norm_bunch", "Magnitude of bunching, all clinics", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="norm_shrink", "Magnitude of bunching, all clinics\n(FDR adjusted)", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- as.factor(coefs$`Independent Variable`)
  
  
  coefs$vars[coefs$vars=="diag_htn10 sig_neg_bunch_mag"] <- "Hypertension\nDiagnosis"
  coefs$vars[coefs$vars=="diag_htn10 neg_bunch_mag"] <- "Hypertension\nDiagnosis"
  coefs$vars[coefs$vars=="diag_htn10 norm_bunch"] <- "Hypertension\nDiagnosis"
  coefs$vars[coefs$vars=="diag_htn10 sig_neg_bunch"] <- "Hypertension\nDiagnosis"
  coefs$vars[coefs$vars=="diag_htn10 norm_shrink"] <- "Hypertension\nDiagnosis"
  coefs$vars[coefs$vars=="med_htn_ed sig_neg_bunch_mag"] <- "Hypertension\nPrescription"
  coefs$vars[coefs$vars=="med_htn_ed neg_bunch_mag"] <- "Hypertension\nPrescription"
  coefs$vars[coefs$vars=="med_htn_ed norm_bunch"] <- "Hypertension\nPrescription"
  coefs$vars[coefs$vars=="med_htn_ed sig_neg_bunch"] <- "Hypertension\nPrescription"
  coefs$vars[coefs$vars=="med_htn_ed norm_shrink"] <- "Hypertension\nPrescription"
  
  
  diag <- ggplot(coefs, aes(x=vars, y=Estimate, color=`Independent Variable`, 
                            shape = `Independent Variable`)) + 
    geom_hline(yintercept=0, lty=1,  colour="grey70")  +
    geom_errorbar(aes(ymin=Estimate - 1.96*`Cluster s.e.`, ymax=Estimate + 1.96*`Cluster s.e.`), 
                  position = position_dodge(width=0.5), width=0) + 
    geom_point(size=3, position = position_dodge2(width=0.5)) +  
    scale_color_manual(values = c("black", "royalblue4", "royalblue2", "steelblue2", "lightskyblue3")) + 
    scale_shape_manual(values = c(19, 15, 17, 18, 8)) + 
    xlab("") + ylab("Point estimate with 95% CI") + theme_light() 
  
  (diag1 <- diag + theme(legend.position = "bottom") + 
      guides(color=guide_legend(nrow=3, byrow=F, title.position="top")) + 
      theme(axis.text.x = element_text(face="bold", color="black", size=12),
            axis.text.y = element_text(color="black", size=12)))
  ggsave(diag1, file = paste0(leaf,"results_comparison_adherence_horiz.pdf"), width = 6.5, height = 4.5)
  
  
  
# (FIG A7) -------------------------

  hd$cv_stroke_180_h <- hd$cv_stroke_180*100
  hd$cv_acs_180_h <- hd$cv_acs_180 *100
  hd$cv_chf_180_h <- hd$cv_chf_180 *100
  
  est <- felm(cv_stroke_180_h ~ sig_neg_bunch_mag*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs$depvar = 'cv_stroke_180_h'
  coefs$indepvar = 'sig_neg_bunch_mag'
  
  est <- felm(cv_stroke_180_h ~ neg_bunch_mag*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_stroke_180_h'
  coefs1$indepvar = 'neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_stroke_180_h ~ norm_bunch*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_stroke_180_h'
  coefs1$indepvar = 'norm_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_stroke_180_h ~ sig_neg_bunch*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_stroke_180_h'
  coefs1$indepvar = 'sig_neg_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_stroke_180_h ~ norm_shrink*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_stroke_180_h'
  coefs1$indepvar = 'norm_shrink'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_acs_180_h ~ sig_neg_bunch_mag*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_acs_180_h'
  coefs1$indepvar = 'sig_neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_acs_180_h ~ neg_bunch_mag*under140 | year + quarter  + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_acs_180_h'
  coefs1$indepvar = 'neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_acs_180_h ~ norm_bunch*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_acs_180_h'
  coefs1$indepvar = 'norm_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_acs_180_h ~ sig_neg_bunch*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_acs_180_h'
  coefs1$indepvar = 'sig_neg_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_acs_180_h ~ norm_shrink*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_acs_180_h'
  coefs1$indepvar = 'norm_shrink'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_chf_180_h ~ sig_neg_bunch_mag*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_chf_180_h'
  coefs1$indepvar = 'sig_neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_chf_180_h ~ neg_bunch_mag*under140 | year + quarter   + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_chf_180_h'
  coefs1$indepvar = 'neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_chf_180_h ~ norm_bunch*under140 | year + quarter   + male + age_round | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_chf_180_h'
  coefs1$indepvar = 'norm_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_chf_180_h ~ sig_neg_bunch*under140 | year + quarter  + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_chf_180_h'
  coefs1$indepvar = 'sig_neg_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(cv_chf_180_h ~ norm_shrink*under140 | year + quarter  + male + age_round  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'cv_chf_180_h'
  coefs1$indepvar = 'norm_shrink'
  coefs <- rbind(coefs, coefs1)
  
  coefs$vars <- paste(coefs$depvar, coefs$indepvar)
  coefs$indepvar <- as.factor(coefs$indepvar)    
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="neg_bunch_mag", "Mag. bunching, negative bunching clinics", "")
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="sig_neg_bunch_mag", "(Preferred) Magnitude of bunching, if p<0.05", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="sig_neg_bunch", "Indicator for bunching with p<0.05", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="norm_bunch", "Magnitude of bunching, all clinics", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="norm_shrink", "Magnitude of bunching, all clinics\n(FDR adjusted)", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- as.factor(coefs$`Independent Variable`)
  
  
  coefs$vars[coefs$vars=="cv_stroke_180_h sig_neg_bunch_mag"] <- "Stroke"
  coefs$vars[coefs$vars=="cv_stroke_180_h neg_bunch_mag"] <- "Stroke"
  coefs$vars[coefs$vars=="cv_stroke_180_h norm_bunch"] <- "Stroke"
  coefs$vars[coefs$vars=="cv_stroke_180_h sig_neg_bunch"] <- "Stroke"
  coefs$vars[coefs$vars=="cv_stroke_180_h norm_shrink"] <- "Stroke"
  coefs$vars[coefs$vars=="cv_acs_180_h sig_neg_bunch_mag"] <- "Heart attack"
  coefs$vars[coefs$vars=="cv_acs_180_h neg_bunch_mag"] <- "Heart attack"
  coefs$vars[coefs$vars=="cv_acs_180_h norm_bunch"] <- "Heart attack"
  coefs$vars[coefs$vars=="cv_acs_180_h sig_neg_bunch"] <- "Heart attack"
  coefs$vars[coefs$vars=="cv_acs_180_h norm_shrink"] <- "Heart attack"
  coefs$vars[coefs$vars=="cv_chf_180_h sig_neg_bunch_mag"] <- "Heart failure"
  coefs$vars[coefs$vars=="cv_chf_180_h neg_bunch_mag"] <- "Heart failure"
  coefs$vars[coefs$vars=="cv_chf_180_h norm_bunch"] <- "Heart failure"
  coefs$vars[coefs$vars=="cv_chf_180_h sig_neg_bunch"] <- "Heart failure"
  coefs$vars[coefs$vars=="cv_chf_180_h norm_shrink"] <- "Heart failure"
  
  
  diag <- ggplot(coefs, aes(x=vars, y=Estimate, color=`Independent Variable`, 
                            shape = `Independent Variable`)) + 
    geom_hline(yintercept=0, lty=1,  colour="grey70")  +
    geom_errorbar(aes(ymin=Estimate - 1.96*`Cluster s.e.`, ymax=Estimate + 1.96*`Cluster s.e.`), 
                  position = position_dodge(width=0.5), width=0) + 
    geom_point(size=3, position = position_dodge2(width=0.5)) +  
    scale_color_manual(values = c("black", "royalblue4", "royalblue2", "steelblue2", "lightskyblue3")) + 
    scale_shape_manual(values = c(19, 15, 17, 18, 8)) + 
    xlab("") + ylab("Point estimate with 95% CI") + theme_light() 
  
  (diag1 <- diag + theme(legend.position = "bottom") + 
      guides(color=guide_legend(nrow=3, byrow=F, title.position="top")) + 
      theme(axis.text.x = element_text(face="bold", color="black", size=12),
            axis.text.y = element_text(color="black", size=12)))
  ggsave(diag1, file = paste0(leaf,"results_comparison_cv_horiz.pdf"), width = 6.5, height = 4.5)
  
  
  
  
  
# (FIG A8) -------------------------
  est <- felm(male ~ sig_neg_bunch_mag*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs$depvar = 'male'
  coefs$indepvar = 'sig_neg_bunch_mag'
  
  est <- felm(male ~ neg_bunch_mag*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'male'
  coefs1$indepvar = 'neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(male ~ norm_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'male'
  coefs1$indepvar = 'norm_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(male ~ sig_neg_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'male'
  coefs1$indepvar = 'sig_neg_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(male ~ norm_shrink*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'male'
  coefs1$indepvar = 'norm_shrink'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(bmi_sobese ~ sig_neg_bunch_mag*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'bmi_sobese'
  coefs1$indepvar = 'sig_neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(bmi_sobese ~ neg_bunch_mag*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'bmi_sobese'
  coefs1$indepvar = 'neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(bmi_sobese ~ norm_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'bmi_sobese'
  coefs1$indepvar = 'norm_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(bmi_sobese ~ sig_neg_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'bmi_sobese'
  coefs1$indepvar = 'sig_neg_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(bmi_sobese ~ norm_shrink*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'bmi_sobese'
  coefs1$indepvar = 'norm_shrink'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over55 ~ sig_neg_bunch_mag*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over55'
  coefs1$indepvar = 'sig_neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over55 ~ neg_bunch_mag*under140 | year + quarter  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over55'
  coefs1$indepvar = 'neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over55 ~ norm_bunch*under140 | year + quarter  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over55'
  coefs1$indepvar = 'norm_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over55 ~ sig_neg_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over55'
  coefs1$indepvar = 'sig_neg_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over55 ~ norm_shrink*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over55'
  coefs1$indepvar = 'norm_shrink'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over65 ~ sig_neg_bunch_mag*under140 | year + quarter  | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over65'
  coefs1$indepvar = 'sig_neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over65 ~ neg_bunch_mag*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over65'
  coefs1$indepvar = 'neg_bunch_mag'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over65 ~ norm_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over65'
  coefs1$indepvar = 'norm_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over65 ~ sig_neg_bunch*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over65'
  coefs1$indepvar = 'sig_neg_bunch'
  coefs <- rbind(coefs, coefs1)
  
  est <- felm(over65 ~ norm_shrink*under140 | year + quarter   | 0 | cod_est, data=hd)
  coefs1 = (as.data.frame(summary(est)$coefficients))[3, 1:2]
  coefs1$depvar = 'over65'
  coefs1$indepvar = 'norm_shrink'
  coefs <- rbind(coefs, coefs1)
  
  coefs$vars <- paste(coefs$depvar, coefs$indepvar)
  coefs$indepvar <- as.factor(coefs$indepvar)    
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="neg_bunch_mag", "Mag. bunching, negative bunching clinics", "")
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="sig_neg_bunch_mag", "(Preferred) Magnitude of bunching, if p<0.05", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="sig_neg_bunch", "Indicator for bunching with p<0.05", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="norm_bunch", "Magnitude of bunching, all clinics", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- ifelse(coefs$indepvar=="norm_shrink", "Magnitude of bunching, all clinics\n(FDR adjusted)", coefs$`Independent Variable`)
  coefs$`Independent Variable` <- as.factor(coefs$`Independent Variable`)
  
  
  coefs$vars[coefs$vars=="male sig_neg_bunch_mag"] <- "Male"
  coefs$vars[coefs$vars=="male neg_bunch_mag"] <- "Male"
  coefs$vars[coefs$vars=="male norm_bunch"] <- "Male"
  coefs$vars[coefs$vars=="male sig_neg_bunch"] <- "Male"
  coefs$vars[coefs$vars=="male norm_shrink"] <- "Male"
  coefs$vars[coefs$vars=="bmi_sobese sig_neg_bunch_mag"] <- "Severe\nObesity"
  coefs$vars[coefs$vars=="bmi_sobese neg_bunch_mag"] <- "Severe\nObesity"
  coefs$vars[coefs$vars=="bmi_sobese norm_bunch"] <- "Severe\nObesity"
  coefs$vars[coefs$vars=="bmi_sobese sig_neg_bunch"] <- "Severe\nObesity"
  coefs$vars[coefs$vars=="bmi_sobese norm_shrink"] <- "Severe\nObesity"
  coefs$vars[coefs$vars=="over55 sig_neg_bunch_mag"] <- "Age 55+"
  coefs$vars[coefs$vars=="over55 neg_bunch_mag"] <- "Age 55+"
  coefs$vars[coefs$vars=="over55 norm_bunch"] <- "Age 55+"
  coefs$vars[coefs$vars=="over55 sig_neg_bunch"] <- "Age 55+"
  coefs$vars[coefs$vars=="over55 norm_shrink"] <- "Age 55+"
  coefs$vars[coefs$vars=="over65 sig_neg_bunch_mag"] <- "Age 65+"
  coefs$vars[coefs$vars=="over65 neg_bunch_mag"] <- "Age 65+"
  coefs$vars[coefs$vars=="over65 norm_bunch"] <- "Age 65+"
  coefs$vars[coefs$vars=="over65 sig_neg_bunch"] <- "Age 65+"
  coefs$vars[coefs$vars=="over65 norm_shrink"] <- "Age 65+"
  
  
  diag <- ggplot(coefs, aes(x=vars, y=Estimate, color=`Independent Variable`, 
                            shape = `Independent Variable`)) + 
    geom_hline(yintercept=0, lty=1,  colour="grey70")  +
    geom_errorbar(aes(ymin=Estimate - 1.96*`Cluster s.e.`, ymax=Estimate + 1.96*`Cluster s.e.`), 
                  position = position_dodge(width=0.5), width=0) + 
    geom_point(size=3, position = position_dodge2(width=0.5)) +  
    scale_color_manual(values = c("black", "royalblue4", "royalblue2", "steelblue2", "lightskyblue3")) + 
    scale_shape_manual(values = c(19, 15, 17, 18, 8)) + 
    xlab("") + ylab("Point estimate with 95% CI") + theme_light() 
  
  (diag1 <- diag + theme(legend.position = "bottom") + 
      guides(color=guide_legend(nrow=3, byrow=F, title.position="top")) + 
      theme(axis.text.x = element_text(face="bold", color="black", size=12),
            axis.text.y = element_text(color="black", size=12)))
  ggsave(diag1, file = paste0(leaf,"results_comparison_heuristics_horiz.pdf"), width = 6.5, height = 4.5)
  

# (TAB A10, A11, A12) --------
  mean1 <- function(x) {
    meantest <- round(mean(hd[[paste0(x)]], na.rm=T),3)
  }
  clinics1 <- function(x) {
    if (any(!is.na(hd[[paste0(x)]]))) {
      clinictest <- length(unique(hd[["cod_est"]]))
      return(clinictest) 
    } else {
      return(NA) 
    }
  }
  
  
  ## Tab A10 - Diag and Prescription 
  outcomes <- c('diag_htn10', 'med_htn_ed')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ norm_shrink*under140   | year + quarter   | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Discretion on Hypertension Diagnosis and Prescription (FDR adjusted)", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_guidelines_fdr",
            dep.var.labels= c('\\shortstack{Hypertension \\\\ Diagnosis}', '\\shortstack{Hypertension \\\\ Prescription}'),
            omit.table.layout = "n", font.size = "small", 
            order=c("norm_shrink:under140", "norm_shrink", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_guidelines_fdr.tex"))
  

  ## Tab A11 - Hosp per 100
  hd$cv_stroke_90_h <- hd$cv_stroke_90*100
  hd$cv_acs_90_h <- hd$cv_acs_90 *100
  hd$cv_chf_90_h <- hd$cv_chf_90 *100
  hd$cv_stroke_180_h <- hd$cv_stroke_180*100
  hd$cv_acs_180_h <- hd$cv_acs_180 *100
  hd$cv_chf_180_h <- hd$cv_chf_180 *100
  hd$cv_stroke_365_h <- hd$cv_stroke_365*100
  hd$cv_acs_365_h <- hd$cv_acs_365 *100
  hd$cv_chf_365_h <- hd$cv_chf_365 *100
  
  outcomes <- c( 'cv_stroke_90_h', 'cv_stroke_180_h', 'cv_stroke_365_h',
                 'cv_acs_90_h', 'cv_acs_180_h', 'cv_acs_365_h',
                 'cv_chf_90_h',  'cv_chf_180_h', 'cv_chf_365_h')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ norm_shrink*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Discretion on Cardiovascular Hospitalization 3, 6, and 12 Months After Primary Care Visit (FDR adjusted)", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.(\\%)", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_cv_fdr",
            dep.var.labels= c('$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.', 
                              '$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.',
                              '$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.'),
            order=c("norm_shrink:under140", "norm_shrink", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            omit.table.layout = "n", font.size = "small",
            out = paste0(leaf, "results_cv_fdr.tex"))
  
  ## Tab A12 - Heuristics
  outcomes <- c('male', 'bmi_sobese', 'over55', 'over65')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ norm_shrink*under140   | year + quarter    | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Selection by Patient Characteristics Representative of High Cardiovascular Risk (FDR adjusted)", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-",
            dep.var.labels= c('Male',  '\\shortstack{Severe\\\\Obesity}', 'Age 55+', 'Age 65+'),
            omit.table.layout = "n", font.size = "small", label = "tab:results_heuristics_fdr",
            order=c("norm_shrink:under140", "norm_shrink", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_heuristics_fdr.tex"))
  
# (FIG A9) -------
  hd$cv_stroke_acs_365 <- ifelse(hd$cv_stroke_365==1 | hd$cv_acs_365 ==1, 1, 0)
  hd$cv_stroke_acs_365_h <- hd$cv_stroke_acs_365 * 100
  hd$sig_neg_bunch <- as.factor(hd$sig_neg_bunch)
  hd$pa_sys_10 <- round(hd$pa_sys, digits=-1)
  hd %<>% dplyr::mutate(pa_sys_10 = ifelse(pa_sys_10>200, 200, pa_sys_10))
  
  result <- NA
  result <- data.frame(matrix(nrow = 3, ncol = 6))
  colnames(result) <- c("systolic_bp", "cv_stroke_acs_365_h", "bunching_clinic", "n", "lower", "upper")
  for (i in c(seq(80, 200, 10))){
    m1 <- felm(cv_stroke_acs_365_h ~ 1 | 0 | 0 | cod_est, data = hd[hd$pa_sys_10==i & hd$sig_neg_bunch==0,])
    cm1 <- confint(m1)
    result[i, 1] <- i
    result[i, 2] <- m1$coefficients[1,1]
    result[i, 3] <- 0 
    result[i, 4] <- count(hd[hd$pa_sys_10==i & hd$sig_neg_bunch==0,])
    result[i, 5] <- cm1[1,1]
    result[i, 6] <- cm1[1,2]
  }
  result$bunching_clinic <- as.factor(result$bunching_clinic)
  
  
  result2 <- NA
  result2 <- data.frame(matrix(nrow = 3, ncol = 6))
  colnames(result2) <- c("systolic_bp", "cv_stroke_acs_365_h", "bunching_clinic", "n", "lower", "upper")
  for (i in c(seq(80, 200, 10))){
    m1 <- felm(cv_stroke_acs_365_h ~ 1 | 0 | 0 | cod_est, data = hd[hd$pa_sys_10==i & hd$sig_neg_bunch==1,])
    cm1 <- confint(m1)
    result2[i, 1] <- i
    result2[i, 2] <- m1$coefficients[1,1]
    result2[i, 3] <- 1
    result2[i, 4] <- count(hd[hd$pa_sys_10==i & hd$sig_neg_bunch==1,])
    result2[i, 5] <- cm1[1,1]
    result2[i, 6] <- cm1[1,2]
  }
  result2$bunching_clinic <- as.factor(result2$bunching_clinic)
  
  result0 <- rbind(result, result2)
  result0 <- result0[!is.na(result0$systolic_bp),]
  
  (reg_attcl<- ggplot(result0, aes(x=systolic_bp, y=cv_stroke_acs_365_h, color=bunching_clinic, shape=bunching_clinic)) +
      geom_vline(xintercept=140, color = 'grey', linetype=2) +
      geom_errorbar(data=result0, mapping=aes(x=systolic_bp, ymin=lower,ymax=upper, color=bunching_clinic), position = position_dodge(width=3), width=0) +
      geom_point(size=2, position = position_dodge2(width=3)) + scale_color_manual(values = c("darkgrey", "darkgreen")) + scale_x_continuous(breaks = seq(80, 240, by = 10)) + 
      labs(title = "") + xlab("Systolic blood pressure") + ylab("Hospitalization within 1 year") +
      labs(color = "High Discretion Clinic", shape = "High Discretion Clinic")  +
      theme_bw())
  (reg_attcl<- reg_attcl + theme(legend.position=c(0.52,0.94), legend.direction = "horizontal") + 
      theme(legend.background = element_rect(fill = NA), legend.key = element_rect(fill = NA, color = NA)))
  ggsave(reg_attcl, file = paste0(leaf,"did_cv_stroke_acs_365_pa_sys_10.pdf"), width = 6, height = 4) 
  
  
  # by high vs low bunching -- bin = 20
  hd$pa_sys_20 <- NA
  hd %<>% dplyr::mutate(pa_sys_20 = ifelse(pa_sys<=99, 90, pa_sys_20))
  hd %<>% dplyr::mutate(pa_sys_20 = ifelse(pa_sys>=100 & pa_sys<=119, 110, pa_sys_20))
  hd %<>% dplyr::mutate(pa_sys_20 = ifelse(pa_sys>=120 & pa_sys<=139, 130, pa_sys_20))
  hd %<>% dplyr::mutate(pa_sys_20 = ifelse(pa_sys>=140 & pa_sys<=159, 150, pa_sys_20))
  hd %<>% dplyr::mutate(pa_sys_20 = ifelse(pa_sys>=160 & pa_sys<=179, 170, pa_sys_20))
  hd %<>% dplyr::mutate(pa_sys_20 = ifelse(pa_sys>=180, 190, pa_sys_20))
  
  
  result <- NA
  result <- data.frame(matrix(nrow = 3, ncol = 6))
  colnames(result) <- c("systolic_bp", "cv_stroke_acs_365_h", "bunching_clinic", "n", "lower", "upper")
  for (i in c(seq(90, 190, 20))){
    m1 <- felm(cv_stroke_acs_365_h ~ 1 | 0 | 0 | cod_est, data = hd[hd$pa_sys_20==i & hd$sig_neg_bunch==0,]) 
    cm1 <- confint(m1)
    result[i, 1] <- i
    result[i, 2] <- m1$coefficients[1,1]
    result[i, 3] <- 0 
    result[i, 4] <- count(hd[hd$pa_sys_20==i & hd$sig_neg_bunch==0,])
    result[i, 5] <- cm1[1,1]
    result[i, 6] <- cm1[1,2]
  }
  result$bunching_clinic <- as.factor(result$bunching_clinic)
  
  
  
  result2 <- NA
  result2 <- data.frame(matrix(nrow = 3, ncol = 6))
  colnames(result2) <- c("systolic_bp", "cv_stroke_acs_365_h", "bunching_clinic", "n", "lower", "upper")
  for (i in c(seq(90, 190, 20))){
    m1 <- felm(cv_stroke_acs_365_h ~ 1 | 0 | 0 | cod_est, data = hd[hd$pa_sys_20==i & hd$sig_neg_bunch==1,]) 
    cm1 <- confint(m1)
    result2[i, 1] <- i
    result2[i, 2] <- m1$coefficients[1,1]
    result2[i, 3] <- 1
    result2[i, 4] <- count(hd[hd$pa_sys_20==i & hd$sig_neg_bunch==1,])
    result2[i, 5] <- cm1[1,1]
    result2[i, 6] <- cm1[1,2]
  }
  result2$bunching_clinic <- as.factor(result2$bunching_clinic)
  
  result0 <- rbind(result, result2)
  result0 <- result0[!is.na(result0$systolic_bp),]
  
  (reg_attcl<- ggplot(result0, aes(x=systolic_bp, y=cv_stroke_acs_365_h, color=bunching_clinic, shape=bunching_clinic)) +
      geom_vline(xintercept=140, color = 'grey', linetype=2) +
      geom_errorbar(data=result0, mapping=aes(x=systolic_bp, ymin=lower,ymax=upper, color=bunching_clinic), position = position_dodge(width=3), width=0) +
      geom_point(size=2, position = position_dodge2(width=3)) + 
      scale_color_manual(values = c("darkgrey", "darkgreen")) + 
      scale_x_continuous(breaks = seq(80, 240, by = 10)) + 
      labs(title = "") + xlab("Systolic blood pressure") + ylab("Hospitalization within 1 year") +
      labs(color = "High Discretion Clinic", shape = "High Discretion Clinic")  +
      theme_bw())
  (reg_attcl<- reg_attcl + theme(legend.position=c(0.52,0.94), legend.direction = "horizontal") + 
      theme(legend.background = element_rect(fill = NA), legend.key = element_rect(fill = NA, color = NA)))
  ggsave(reg_attcl, file = paste0(leaf,"did_cv_stroke_acs_365_pa_sys_20.pdf"), width = 6, height = 4) 
  
  
  
# (TAB A13, A15, A17) ---------------------
  mean1 <- function(x) {
    meantest <- round(mean(hd[[paste0(x)]], na.rm=T),3)
  }
  clinics1 <- function(x) {
    # Check if the column exists and has non-NA values
    if (any(!is.na(hd[[paste0(x)]]))) {
      # Calculate the number of unique values in the "cod_est" column
      clinictest <- length(unique(hd[["cod_est"]]))
      return(clinictest) # Ensure the result is returned
    } else {
      return(NA) # Return NA if all values are NA or column doesn't exist
    }
  }
  
  
  ## Tab A13 - Diag and Prescription
  outcomes <- c('diag_htn10', 'med_htn_ed')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   | 0 | cod_est"))
    felm(form, data = hd[hd$pa_sys>=90 & hd$pa_sys<=160,], exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Discretion on Hypertension Diagnosis and Prescription (BP=90 to 160)", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_guidelines_90to160",
            dep.var.labels= c('\\shortstack{Hypertension \\\\ Diagnosis}', '\\shortstack{Hypertension \\\\ Prescription}'),
            omit.table.layout = "n", font.size = "footnotesize",
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_guidelines_90to160.tex"))
  
  
  ## Tab A15 - CV hospitalizations per 100
  hd$cv_stroke_90_h <- hd$cv_stroke_90*100
  hd$cv_acs_90_h <- hd$cv_acs_90 *100
  hd$cv_chf_90_h <- hd$cv_chf_90 *100
  hd$cv_stroke_180_h <- hd$cv_stroke_180*100
  hd$cv_acs_180_h <- hd$cv_acs_180 *100
  hd$cv_chf_180_h <- hd$cv_chf_180 *100
  hd$cv_stroke_365_h <- hd$cv_stroke_365*100
  hd$cv_acs_365_h <- hd$cv_acs_365 *100
  hd$cv_chf_365_h <- hd$cv_chf_365 *100
  
  outcomes <- c( 'cv_stroke_90_h', 'cv_stroke_180_h', 'cv_stroke_365_h',
                 'cv_acs_90_h', 'cv_acs_180_h', 'cv_acs_365_h',
                 'cv_chf_90_h',  'cv_chf_180_h', 'cv_chf_365_h')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
    felm(form, data = hd[hd$pa_sys>=90 & hd$pa_sys<=160,], exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Discretion on Cardiovascular Hospitalization 3, 6, and 12 Months After Primary Care Visit (BP=90 to 160)", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.(\\%)", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_cv_90to160",
            dep.var.labels= c('$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.', 
                              '$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.',
                              '$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.'),
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            omit.table.layout = "n", font.size = "scriptsize",
            out = paste0(leaf, "results_cv_90to160.tex"))
  

  
  ## Tab A17 Heurisitcs 
  outcomes <- c('male',  'bmi_sobese', 'over55', 'over65')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter    | 0 | cod_est"))
    felm(form, data = hd[hd$pa_sys>=90 & hd$pa_sys<=160,], exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Selection by Patient Characteristics Representative of High Cardiovascular Risk (BP=90 to 160)", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-",
            dep.var.labels= c('Male',  '\\shortstack{Severe\\\\Obesity}', 'Age 55+', 'Age 65+'),
            omit.table.layout = "n", font.size = "footnotesize", label = "tab:results_heuristics_90to160",
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_heuristics_90to160.tex"))
  
 
  
# (TAB A14, A16, A18) ---------------------
  mean1 <- function(x) {
    meantest <- round(mean(hd[[paste0(x)]], na.rm=T),3)
  }
  clinics1 <- function(x) {
    # Check if the column exists and has non-NA values
    if (any(!is.na(hd[[paste0(x)]]))) {
      # Calculate the number of unique values in the "cod_est" column
      clinictest <- length(unique(hd[["cod_est"]]))
      return(clinictest) # Ensure the result is returned
    } else {
      return(NA) # Return NA if all values are NA or column doesn't exist
    }
  }
  
  ## Tab A14 - Diag and Prescription
  outcomes <- c('diag_htn10', 'med_htn_ed')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   | 0 | cod_est"))
    felm(form, data = hd[hd$pa_sys>=120 & hd$pa_sys<=160,], exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Discretion on Hypertension Diagnosis and Prescription (BP=120 to 160)", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_guidelines_120to160",
            dep.var.labels= c('\\shortstack{Hypertension \\\\ Diagnosis}', '\\shortstack{Hypertension \\\\ Prescription}'),
            omit.table.layout = "n", font.size = "footnotesize",
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_guidelines_120to160.tex"))
  
  
  
  ## Tab A16 - CV hospitalizations per 100
  hd$cv_stroke_90_h <- hd$cv_stroke_90*100
  hd$cv_acs_90_h <- hd$cv_acs_90 *100
  hd$cv_chf_90_h <- hd$cv_chf_90 *100
  hd$cv_stroke_180_h <- hd$cv_stroke_180*100
  hd$cv_acs_180_h <- hd$cv_acs_180 *100
  hd$cv_chf_180_h <- hd$cv_chf_180 *100
  hd$cv_stroke_365_h <- hd$cv_stroke_365*100
  hd$cv_acs_365_h <- hd$cv_acs_365 *100
  hd$cv_chf_365_h <- hd$cv_chf_365 *100
  
  outcomes <- c( 'cv_stroke_90_h', 'cv_stroke_180_h', 'cv_stroke_365_h',
                 'cv_acs_90_h', 'cv_acs_180_h', 'cv_acs_365_h',
                 'cv_chf_90_h',  'cv_chf_180_h', 'cv_chf_365_h')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter  + male + age_round  | 0 | cod_est"))
    felm(form, data = hd[hd$pa_sys>=120 & hd$pa_sys<=160,], exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Discretion on Cardiovascular Hospitalization 3, 6, and 12 Months After Primary Care Visit (BP=120 to 160)", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.(\\%)", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_cv_120to160",
            dep.var.labels= c('$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.', 
                              '$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.',
                              '$\\leq$3 mo.', '$\\leq$6 mo.', '$\\leq$12 mo.'),
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            omit.table.layout = "n", font.size = "scriptsize",
            out = paste0(leaf, "results_cv_120to160.tex"))
  
  
  ## Tab A18 heuristics
  outcomes <- c('male', 'bmi_sobese', 'over55', 'over65')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter    | 0 | cod_est"))
    felm(form, data = hd[hd$pa_sys>=120 & hd$pa_sys<=160,], exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Selection by Patient Characteristics Representative of High Cardiovascular Risk (BP=120 to 160)", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-",
            dep.var.labels= c('Male',  '\\shortstack{Severe\\\\Obesity}', 'Age 55+', 'Age 65+'),
            omit.table.layout = "n", font.size = "footnotesize", label = "tab:results_heuristics_120to160",
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_heuristics_120to160.tex"))
  
  
  
# (TAB A23) ------   
  outcomes <- c('age_50s', 'age_60s', 'age_70s')
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, "~ sig_neg_bunch_mag*under140   | year + quarter   | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Patient Characteristics - Age", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:results_age",
            omit.table.layout = "n", font.size = "small",
            dep.var.labels= c('Age 50+', 'Age 60+', 'Age 70+'),
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_age.tex"))
  
  
# (TAB A24) ------  
  hd$ant_fliar_cardiopatia_ind <- ifelse(!is.na(hd$ant_fliar_cardiopatia),1, 0)
  hd$ant_lipidos_genetico_ind <- ifelse(!is.na(hd$ant_lipidos_genetico),1, 0)
  hd$ant_pers_enf_cv_ind <- ifelse(!is.na(hd$ant_pers_enf_cv),1, 0)
  
  hd$any_hist_q <- ifelse(hd$ant_fliar_cardiopatia_ind==1|hd$ant_lipidos_genetico_ind==1|hd$ant_pers_enf_cv_ind==1, 1, 0)
  hd$any_hist_event <- ifelse(hd$ant_fliar_cardiopatia==1|hd$ant_lipidos_genetico==1|hd$ant_pers_enf_cv==1, 1, 0)
  
  outcomes <- c('any_hist_q',  'ant_fliar_cardiopatia_ind', 'ant_lipidos_genetico_ind', 'ant_pers_enf_cv_ind', 
                'any_hist_event', 'ant_fliar_cardiopatia', 'ant_lipidos_genetico',  'ant_pers_enf_cv')
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter  | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Selection by Patient Characteristics - Health History Questions", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-",
            dep.var.labels= c('Any', 'Heart disease', 'Dyslipidemia',  'CVD', 'Any', 'Heart disease',  'Dyslipidemia', 'CVD'),
            omit.table.layout = "n", font.size = "small", label = "tab:results_history",
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Discretion x BP$<$140}", "Discretion","BP$<$140"),
            out = paste0(leaf, "results_history.tex"))
  
  
# (FIG A10) --------------------------   
  
  # framingham questions
  hd %<>% dplyr::mutate(colesterol_total_mgdl_nm = ifelse(!is.na(colesterol_total_mgdl), 1, 0))
  hd %<>% dplyr::mutate(hdl_mgdl_nm = ifelse(!is.na(hdl_mgdl), 1, 0))
  hd %<>% dplyr::mutate(tabaco_nm = ifelse(!is.na(tabaco), 1, 0))
  hd %<>% dplyr::mutate(diag_dm_nm =  ifelse(!is.na(diag_dm), 1, 0))
  hd %<>% dplyr::mutate(bmi_nm =  ifelse(!is.na(bmi), 1, 0))
  
  # history questions
  hd$ant_fliar_cardiopatia_ind <- ifelse(!is.na(hd$ant_fliar_cardiopatia),1, 0)
  hd$ant_lipidos_genetico_ind <- ifelse(!is.na(hd$ant_lipidos_genetico),1, 0)
  hd$ant_pers_enf_cv_ind <- ifelse(!is.na(hd$ant_pers_enf_cv),1, 0)
  
  # CV risk
  hd %<>% dplyr::mutate(riesgo_cvn_nm =  ifelse(!is.na(riesgo_cvn), 1, 0))
  
  est <- felm(sig_neg_bunch_mag ~ colesterol_total_mgdl_nm  | year + quarter | 0 | cod_est, data = hd)
  coefs_p = as.data.frame(summary(est)$coefficients)
  coefs_p <- coefs_p[, 1:2]
  coefs_p$vars = rownames(coefs_p)
  
  est <- felm(sig_neg_bunch_mag ~ hdl_mgdl_nm  |year + quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ tabaco_nm  | year + quarter   | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ diag_dm_nm  | year + quarter | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ bmi_nm  | year + quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~  ant_fliar_cardiopatia_ind  | year + quarter   | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~  ant_lipidos_genetico_ind  | year +quarter | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ ant_pers_enf_cv_ind  | year + quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  est <- felm(sig_neg_bunch_mag ~ riesgo_cvn_nm  | year + quarter  | 0 | cod_est, data = hd)
  coefs_p1 = as.data.frame(summary(est)$coefficients)
  coefs_p1 <- coefs_p1[, 1:2]
  coefs_p1$vars = rownames(coefs_p1)
  
  coefs_p <- rbind(coefs_p, coefs_p1)
  
  coefs_p$vars[coefs_p$vars=="colesterol_total_mgdl_nm"] <-   "Recorded total cholesterol"         
  coefs_p$vars[coefs_p$vars=="hdl_mgdl_nm"] <-  "Recorded HDL cholsterol"
  coefs_p$vars[coefs_p$vars=="tabaco_nm"] <- "Recorded smoking status"  
  coefs_p$vars[coefs_p$vars=="diag_dm_nm"] <- "Recorded type 2 diabetes status"
  coefs_p$vars[coefs_p$vars=="bmi_nm"] <- "Recorded BMI"
  coefs_p$vars[coefs_p$vars=="ant_fliar_cardiopatia_ind"] <- "Recorded history of heart disease"
  coefs_p$vars[coefs_p$vars=="ant_lipidos_genetico_ind"] <- "Recorded history of dyslipidemia"
  coefs_p$vars[coefs_p$vars=="ant_pers_enf_cv_ind"] <- "Recorded history of cardiovascular disease"
  coefs_p$vars[coefs_p$vars=="riesgo_cvn_nm"] <- "Recorded cardiovascular risk level"
  
  
  (bal_patient <- ggplot(coefs_p, aes(Estimate, vars)) + 
      geom_vline(xintercept=0, lty=1,  colour="grey50")  +
      geom_errorbarh(aes(xmin=Estimate - 1.96*`Cluster s.e.`, xmax=Estimate + 1.96*`Cluster s.e.`), 
                     colour="grey40", height=0) +
      geom_point(size=2, shape=15, color="grey40") +   
      scale_x_continuous(breaks = seq(-0.5, 0.5, by = 0.02)) + xlim(c(-0.25, 0.25)) + ylab("") +
      xlab("Estimate with 95% confidence interval") + theme_light() +  theme(text = element_text(size=15), axis.text.y = element_text(size=15)))
  ggsave(bal_patient, file = paste0(leaf,"patient_balance_ttest_existance.pdf"), width = 8, height = 4)
  
  
  
# (TAB A25, A26) ------
    
    nb <- hd[hd$sig_neg_bunch==0,]
    
    #impute:
    nb %<>% dplyr::mutate(age_at_visit = ifelse(is.na(age_at_visit), mean(age_at_visit, na.rm=T), age_at_visit))
    nb %<>% dplyr::mutate(colesterol_total_mgdl = ifelse(is.na(colesterol_total_mgdl), mean(colesterol_total_mgdl, na.rm=T), colesterol_total_mgdl))
    nb %<>% dplyr::mutate(hdl_mgdl = ifelse(is.na(hdl_mgdl), mean(hdl_mgdl, na.rm=T), hdl_mgdl))
    nb %<>% dplyr::mutate(tabaco = ifelse(is.na(tabaco), mean(tabaco, na.rm=T), tabaco))
    
    nb %<>% dplyr::mutate(log_age = log(age_at_visit))
    nb %<>% dplyr::mutate(log_total_chol = log(colesterol_total_mgdl))
    nb %<>% dplyr::mutate(log_hdl_chol = log(hdl_mgdl)) 
    
    nb %<>% dplyr::mutate(cv_stroke_acs_365 = ifelse(cv_stroke_365==1 | cv_acs_365==1, 1, 0)) 
    nb %<>% dplyr::mutate(cv_stroke_acs_180 = ifelse(cv_stroke_180==1 | cv_acs_180==1, 1, 0)) 
    nb %<>% dplyr::mutate(cv_stroke_acs_90 = ifelse(cv_stroke_90==1 | cv_acs_90==1, 1, 0)) 
    
    nb$cv_stroke_acs_365_h <- nb$cv_stroke_acs_365*100
    nb$cv_stroke_365_h <- nb$cv_stroke_365*100
    nb$cv_acs_365_h <- nb$cv_acs_365*100
    nb$cv_stroke_acs_180_h <- nb$cv_stroke_acs_180*100
    nb$cv_stroke_180_h <- nb$cv_stroke_180*100
    nb$cv_acs_180_h <- nb$cv_acs_180*100
    nb$cv_stroke_acs_90_h <- nb$cv_stroke_acs_90*100
    nb$cv_stroke_90_h <- nb$cv_stroke_90*100
    nb$cv_acs_90_h <- nb$cv_acs_90*100
    
    # functions:
    mean1 <- function(x) {
      meantest <- round(mean(nb[[paste0(x)]], na.rm=T),3)
    }
    clinics1 <- function(x) {
      if (any(!is.na(nb[[paste0(x)]]))) {
        clinictest <- length(unique(nb[["cod_est"]]))
        return(clinictest) 
      } else {
        return(NA) 
      }
    }
    

    bb <- hd[hd$sig_neg_bunch==1,]
    
    #impute:
    bb %<>% dplyr::mutate(age_at_visit = ifelse(is.na(age_at_visit), mean(age_at_visit, na.rm=T), age_at_visit))
    bb %<>% dplyr::mutate(colesterol_total_mgdl = ifelse(is.na(colesterol_total_mgdl), mean(colesterol_total_mgdl, na.rm=T), colesterol_total_mgdl))
    bb %<>% dplyr::mutate(hdl_mgdl = ifelse(is.na(hdl_mgdl), mean(hdl_mgdl, na.rm=T), hdl_mgdl))
    bb %<>% dplyr::mutate(tabaco = ifelse(is.na(tabaco), mean(tabaco, na.rm=T), tabaco))
    
    bb %<>% dplyr::mutate(log_age = log(age_at_visit))
    bb %<>% dplyr::mutate(log_total_chol = log(colesterol_total_mgdl))
    bb %<>% dplyr::mutate(log_hdl_chol = log(hdl_mgdl)) 
    
    bb %<>% dplyr::mutate(cv_stroke_acs_365 = ifelse(cv_stroke_365==1 | cv_acs_365==1, 1, 0)) 
    bb %<>% dplyr::mutate(cv_stroke_acs_180 = ifelse(cv_stroke_180==1 | cv_acs_180==1, 1, 0)) 
    bb %<>% dplyr::mutate(cv_stroke_acs_90 = ifelse(cv_stroke_90==1 | cv_acs_90==1, 1, 0)) 
    
    bb$cv_stroke_acs_365_h <- bb$cv_stroke_acs_365*100
    bb$cv_stroke_365_h <- bb$cv_stroke_365*100
    bb$cv_acs_365_h <- bb$cv_acs_365*100
    bb$cv_stroke_acs_180_h <- bb$cv_stroke_acs_180*100
    bb$cv_stroke_180_h <- bb$cv_stroke_180*100
    bb$cv_acs_180_h <- bb$cv_acs_180*100
    bb$cv_stroke_acs_90_h <- bb$cv_stroke_acs_90*100
    bb$cv_stroke_90_h <- bb$cv_stroke_90*100
    bb$cv_acs_90_h <- bb$cv_acs_90*100
    
    mean1_bb <- function(x) {
      meantest <- round(mean(bb[[paste0(x)]], na.rm=T),3)
    }
    clinics1_bb <- function(x) {
      if (any(!is.na(bb[[paste0(x)]]))) {
        clinictest <- length(unique(bb[["cod_est"]]))
        return(clinictest) 
      } else {
        return(NA) 
      }
    }
    
    outcomes <- c( 'cv_stroke_acs_365_h')
    
    meandf <- as.character(lapply(outcomes, mean1))
    clinicdf <- as.character(lapply(outcomes, clinics1))
    
    reglist1 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ pa_sys   | 0 | 0 | cod_est"))
      felm(form, data = nb, exactDOF=TRUE)
    })
    
    reglist2 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ pa_sys + diag_dm10 + male + log_age + log_total_chol + log_hdl_chol + tabaco  | 0 | 0 | cod_est"))
      felm(form, data = nb, exactDOF=TRUE)
    })
    
    reglist3 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ pa_sys  | 0 | 0 | cod_est"))
      felm(form, data = bb, exactDOF=TRUE)
    })
    
    
    meandf_bb <- as.character(lapply(outcomes, mean1_bb))
    clinicdf_bb <- as.character(lapply(outcomes, clinics1_bb))
    
    reglist4 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ pa_sys + diag_dm10 + male + log_age + log_total_chol + log_hdl_chol + tabaco  | 0 | 0 | cod_est"))
      felm(form, data = bb, exactDOF=TRUE)
    })
    
    reglist1[[length(reglist1) + 1]] <- reglist2
    reglist1[[length(reglist1) + 1]] <- reglist3
    reglist1[[length(reglist1) + 1]] <- reglist4
    meandf <- cbind(meandf, meandf, meandf_bb, meandf_bb)
    clinicdf <- cbind(clinicdf, clinicdf, clinicdf_bb, clinicdf_bb)
    
    stargazer(reglist1, title="Predicting Stroke and Heart Attack with Patient Characteristics", keep.stat=c("n", "adj.rsq"),
              add.lines=list(c("Mean dep. var.(\\%)", meandf), c("Clinics", clinicdf)), 
              dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:private_strokeacs_365",
              dep.var.labels= c('Stroke or Heart Attack $\\leq$12 Months (per 100)'),
              omit.table.layout = "n", font.size = "small",
              covariate.labels=c("\\textbf{Blood Pressure}", "DM2 diagnosis","Male", "Log Age", "Log Total Chol.", "Log HDL Chol.", "Smoker"),
              out = paste0(leaf, "r2_cv_stroke_acs_365_h.tex"))
    
    
    outcomes <- c( 'cv_stroke_acs_365_h')
    
    
    meandf <- as.character(lapply(outcomes, mean1))
    clinicdf <- as.character(lapply(outcomes, clinics1))
    
    reglist1 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ over140   | 0 | 0 | cod_est"))
      felm(form, data = nb, exactDOF=TRUE)
    })
    
    
    reglist2 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ over140 + diag_dm10 + male + log_age + log_total_chol + log_hdl_chol + tabaco  | 0 | 0 | cod_est"))
      felm(form, data = nb, exactDOF=TRUE)
    })
    
    reglist3 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ over140  | 0 | 0 | cod_est"))
      felm(form, data = bb, exactDOF=TRUE)
    })
    
    
    meandf_bb <- as.character(lapply(outcomes, mean1_bb))
    clinicdf_bb <- as.character(lapply(outcomes, clinics1_bb))
    
    reglist4 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ over140 + diag_dm10 + male + log_age + log_total_chol + log_hdl_chol + tabaco  | 0 | 0 | cod_est"))
      felm(form, data = bb, exactDOF=TRUE)
    })
    
    reglist1[[length(reglist1) + 1]] <- reglist2
    reglist1[[length(reglist1) + 1]] <- reglist3
    reglist1[[length(reglist1) + 1]] <- reglist4
    meandf <- cbind(meandf, meandf, meandf_bb, meandf_bb)
    clinicdf <- cbind(clinicdf, clinicdf, clinicdf_bb, clinicdf_bb)
    
    stargazer(reglist1, title="Predicting Stroke and Heart Attack with Patient Characteristics", keep.stat=c("n", "adj.rsq"),
              add.lines=list(c("Mean dep. var.(\\%)", meandf), c("Clinics", clinicdf)), 
              dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:private_strokeacs_365_indicator",
              dep.var.labels= c('Stroke or Heart Attack $\\leq$12 Months (per 100)'),
              omit.table.layout = "n", font.size = "scriptsize",
              covariate.labels=c("\\textbf{Blood Pressure$\\geq$140}", "DM2 diagnosis","Male", "Log Age", "Log Total Chol.", "Log HDL Chol.", "Smoker"),
              out = paste0(leaf, "r2_cv_stroke_acs_365_h_over140.tex"))
    
    
    